diff options
318 files changed, 9225 insertions, 5846 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 1cc3e02e03a..1325ab7124b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -519,18 +519,20 @@ endif() option(WITH_LEGACY_DEPSGRAPH "Build Blender with legacy dependency graph" ON) mark_as_advanced(WITH_LEGACY_DEPSGRAPH) -# Use hardcoded paths or find_package to find externals -option(WITH_WINDOWS_FIND_MODULES "Use find_package to locate libraries" OFF) -mark_as_advanced(WITH_WINDOWS_FIND_MODULES) +if(WIN32) + # Use hardcoded paths or find_package to find externals + option(WITH_WINDOWS_FIND_MODULES "Use find_package to locate libraries" OFF) + mark_as_advanced(WITH_WINDOWS_FIND_MODULES) -option(WITH_WINDOWS_CODESIGN "Use signtool to sign the final binary." OFF) -mark_as_advanced(WITH_WINDOWS_CODESIGN) + option(WITH_WINDOWS_CODESIGN "Use signtool to sign the final binary." OFF) + mark_as_advanced(WITH_WINDOWS_CODESIGN) -set(WINDOWS_CODESIGN_PFX CACHE FILEPATH "Path to pfx file to use for codesigning.") -mark_as_advanced(WINDOWS_CODESIGN_PFX) + set(WINDOWS_CODESIGN_PFX CACHE FILEPATH "Path to pfx file to use for codesigning.") + mark_as_advanced(WINDOWS_CODESIGN_PFX) -set(WINDOWS_CODESIGN_PFX_PASSWORD CACHE STRING "password for pfx file used for codesigning.") -mark_as_advanced(WINDOWS_CODESIGN_PFX_PASSWORD) + set(WINDOWS_CODESIGN_PFX_PASSWORD CACHE STRING "password for pfx file used for codesigning.") + mark_as_advanced(WINDOWS_CODESIGN_PFX_PASSWORD) +endif() # avoid using again option_defaults_clear() diff --git a/build_files/cmake/packaging.cmake b/build_files/cmake/packaging.cmake index c7063ed6772..e8621bc457a 100644 --- a/build_files/cmake/packaging.cmake +++ b/build_files/cmake/packaging.cmake @@ -1,5 +1,7 @@ -set(PROJECT_DESCRIPTION "Blender is a very fast and versatile 3D modeller/renderer.") -set(PROJECT_COPYRIGHT "Copyright (C) 2001-2012 Blender Foundation") +string(TIMESTAMP CURRENT_YEAR "%Y") + +set(PROJECT_DESCRIPTION "Blender is the free and open source 3D creation suite software.") +set(PROJECT_COPYRIGHT "Copyright (C) 2001-${CURRENT_YEAR} Blender Foundation") set(PROJECT_CONTACT "foundation@blender.org") set(PROJECT_VENDOR "Blender Foundation") @@ -135,4 +137,3 @@ unset(MINOR_VERSION) unset(PATCH_VERSION) unset(BUILD_REV) - diff --git a/doc/python_api/rst/bge.texture.rst b/doc/python_api/rst/bge.texture.rst index 49f6c4469a4..3028ee653f8 100644 --- a/doc/python_api/rst/bge.texture.rst +++ b/doc/python_api/rst/bge.texture.rst @@ -681,7 +681,7 @@ Image classes .. attribute:: zbuff - Use depth component of render as grey scale color - suitable for texture source. + Use depth component of render as grayscale color - suitable for texture source. :type: bool @@ -817,7 +817,7 @@ Image classes .. attribute:: zbuff - Use depth component of viewport as grey scale color - suitable for texture source. + Use depth component of viewport as grayscale color - suitable for texture source. :type: bool @@ -1260,8 +1260,8 @@ Filter classes .. class:: FilterGray - Filter for gray scale effect. - Proportions of R, G and B contributions in the output gray scale are 28:151:77. + Filter for grayscale effect. + Proportions of R, G and B contributions in the output grayscale are 28:151:77. .. attribute:: previous diff --git a/doc/python_api/sphinx_doc_gen.py b/doc/python_api/sphinx_doc_gen.py index ec3131ca19e..47bb323e574 100644 --- a/doc/python_api/sphinx_doc_gen.py +++ b/doc/python_api/sphinx_doc_gen.py @@ -427,9 +427,9 @@ if BLENDER_REVISION != "Unknown": BLENDER_VERSION_DOTS += " " + BLENDER_REVISION # '2.62.1 SHA1' BLENDER_VERSION_PATH = "_".join(blender_version_strings) # '2_62_1' -if bpy.app.version_cycle == "release": - BLENDER_VERSION_PATH = "%s%s_release" % ("_".join(blender_version_strings[:2]), - bpy.app.version_char) # '2_62_release' +if bpy.app.version_cycle in {"rc", "release"}: + # '2_62a_release' + BLENDER_VERSION_PATH = "%s%s_release" % ("_".join(blender_version_strings[:2]), bpy.app.version_char) # --------------------------DOWNLOADABLE FILES---------------------------------- diff --git a/doc/python_api/sphinx_doc_update.py b/doc/python_api/sphinx_doc_update.py index 3d48c1145e1..561e58dec66 100755 --- a/doc/python_api/sphinx_doc_update.py +++ b/doc/python_api/sphinx_doc_update.py @@ -96,6 +96,11 @@ def main(): rsync_base = "rsync://%s@%s:%s" % (args.user, args.rsync_server, args.rsync_root) + blenver = blenver_zip = "" + api_name = "" + branch = "" + is_release = False + # I) Update local mirror using rsync. rsync_mirror_cmd = ("rsync", "--delete-after", "-avzz", rsync_base, args.mirror_dir) subprocess.run(rsync_mirror_cmd, env=dict(os.environ, RSYNC_PASSWORD=args.password)) @@ -108,19 +113,24 @@ def main(): subprocess.run(doc_gen_cmd) # III) Get Blender version info. - blenver = blenver_zip = "" getver_file = os.path.join(tmp_dir, "blendver.txt") getver_script = ("" "import sys, bpy\n" "with open(sys.argv[-1], 'w') as f:\n" - " f.write('%d_%d%s_release\\n' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n" - " if bpy.app.version_cycle in {'rc', 'release'} else '%d_%d_%d\\n' % bpy.app.version)\n" - " f.write('%d_%d_%d' % bpy.app.version)\n") + " is_release = bpy.app.version_cycle in {'rc', 'release'}\n" + " branch = bpy.app.build_branch.split()[0].decode()\n" + " f.write('%d\\n' % is_release)\n" + " f.write('%s\\n' % branch)\n" + " f.write('%d.%d%s\\n' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n" + " if is_release else '%s\\n' % branch)\n" + " f.write('%d_%d%s_release' % (bpy.app.version[0], bpy.app.version[1], bpy.app.version_char)\n" + " if is_release else '%d_%d_%d' % bpy.app.version)\n") get_ver_cmd = (args.blender, "--background", "-noaudio", "--factory-startup", "--python-exit-code", "1", "--python-expr", getver_script, "--", getver_file) subprocess.run(get_ver_cmd) with open(getver_file) as f: - blenver, blenver_zip = f.read().split("\n") + is_release, branch, blenver, blenver_zip = f.read().split("\n") + is_release = bool(int(is_release)) os.remove(getver_file) # IV) Build doc. @@ -132,7 +142,7 @@ def main(): os.chdir(curr_dir) # V) Cleanup existing matching dir in server mirror (if any), and copy new doc. - api_name = "blender_python_api_%s" % blenver + api_name = blenver api_dir = os.path.join(args.mirror_dir, api_name) if os.path.exists(api_dir): shutil.rmtree(api_dir) @@ -150,19 +160,15 @@ def main(): os.rename(zip_path, os.path.join(api_dir, "%s.zip" % zip_name)) # VII) Create symlinks and html redirects. - #~ os.symlink(os.path.join(DEFAULT_SYMLINK_ROOT, api_name, "contents.html"), os.path.join(api_dir, "index.html")) os.symlink("./contents.html", os.path.join(api_dir, "index.html")) - if blenver.endswith("release"): - symlink = os.path.join(args.mirror_dir, "blender_python_api_current") + if is_release: + symlink = os.path.join(args.mirror_dir, "current") os.remove(symlink) os.symlink("./%s" % api_name, symlink) with open(os.path.join(args.mirror_dir, "250PythonDoc/index.html"), 'w') as f: f.write("<html><head><title>Redirecting...</title><meta http-equiv=\"REFRESH\"" "content=\"0;url=../%s/\"></head><body>Redirecting...</body></html>" % api_name) - else: - symlink = os.path.join(args.mirror_dir, "blender_python_api_master") - os.remove(symlink) - os.symlink("./%s" % api_name, symlink) + elif branch == "master": with open(os.path.join(args.mirror_dir, "blender_python_api/index.html"), 'w') as f: f.write("<html><head><title>Redirecting...</title><meta http-equiv=\"REFRESH\"" "content=\"0;url=../%s/\"></head><body>Redirecting...</body></html>" % api_name) diff --git a/extern/cuew/include/cuew.h b/extern/cuew/include/cuew.h index 19087117667..4cce29d38ab 100644 --- a/extern/cuew/include/cuew.h +++ b/extern/cuew/include/cuew.h @@ -114,7 +114,7 @@ extern "C" { #define cuGLGetDevices cuGLGetDevices_v2 /* Types. */ -#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) +#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined (__aarch64__) typedef unsigned long long CUdeviceptr; #else typedef unsigned int CUdeviceptr; diff --git a/intern/atomic/atomic_ops.h b/intern/atomic/atomic_ops.h index 1107deddf94..1e9528f9ed9 100644 --- a/intern/atomic/atomic_ops.h +++ b/intern/atomic/atomic_ops.h @@ -101,11 +101,11 @@ ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x); ATOMIC_INLINE size_t atomic_fetch_and_sub_z(size_t *p, size_t x); ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new); -ATOMIC_INLINE unsigned atomic_add_and_fetch_u(unsigned *p, unsigned x); -ATOMIC_INLINE unsigned atomic_sub_and_fetch_u(unsigned *p, unsigned x); -ATOMIC_INLINE unsigned atomic_fetch_and_add_u(unsigned *p, unsigned x); -ATOMIC_INLINE unsigned atomic_fetch_and_sub_u(unsigned *p, unsigned x); -ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new); +ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x); +ATOMIC_INLINE unsigned int atomic_sub_and_fetch_u(unsigned int *p, unsigned int x); +ATOMIC_INLINE unsigned int atomic_fetch_and_add_u(unsigned int *p, unsigned int x); +ATOMIC_INLINE unsigned int atomic_fetch_and_sub_u(unsigned int *p, unsigned int x); +ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsigned int _new); /* WARNING! Float 'atomics' are really faked ones, those are actually closer to some kind of spinlock-sync'ed operation, * which means they are only efficient if collisions are highly unlikely (i.e. if probability of two threads diff --git a/intern/atomic/intern/atomic_ops_ext.h b/intern/atomic/intern/atomic_ops_ext.h index 8421aa72192..b72c94563fc 100644 --- a/intern/atomic/intern/atomic_ops_ext.h +++ b/intern/atomic/intern/atomic_ops_ext.h @@ -113,58 +113,58 @@ ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new) /******************************************************************************/ /* unsigned operations. */ -ATOMIC_INLINE unsigned atomic_add_and_fetch_u(unsigned *p, unsigned x) +ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x) { - assert(sizeof(unsigned) == LG_SIZEOF_INT); + assert(sizeof(unsigned int) == LG_SIZEOF_INT); #if (LG_SIZEOF_INT == 8) - return (unsigned)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x); + return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)x); #elif (LG_SIZEOF_INT == 4) - return (unsigned)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x); + return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)x); #endif } -ATOMIC_INLINE unsigned atomic_sub_and_fetch_u(unsigned *p, unsigned x) +ATOMIC_INLINE unsigned int atomic_sub_and_fetch_u(unsigned int *p, unsigned int x) { - assert(sizeof(unsigned) == LG_SIZEOF_INT); + assert(sizeof(unsigned int) == LG_SIZEOF_INT); #if (LG_SIZEOF_INT == 8) - return (unsigned)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x)); + return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x)); #elif (LG_SIZEOF_INT == 4) - return (unsigned)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x)); + return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x)); #endif } -ATOMIC_INLINE unsigned atomic_fetch_and_add_u(unsigned *p, unsigned x) +ATOMIC_INLINE unsigned int atomic_fetch_and_add_u(unsigned int *p, unsigned int x) { - assert(sizeof(unsigned) == LG_SIZEOF_INT); + assert(sizeof(unsigned int) == LG_SIZEOF_INT); #if (LG_SIZEOF_INT == 8) - return (unsigned)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x); + return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)x); #elif (LG_SIZEOF_INT == 4) - return (unsigned)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x); + return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)x); #endif } -ATOMIC_INLINE unsigned atomic_fetch_and_sub_u(unsigned *p, unsigned x) +ATOMIC_INLINE unsigned int atomic_fetch_and_sub_u(unsigned int *p, unsigned int x) { - assert(sizeof(unsigned) == LG_SIZEOF_INT); + assert(sizeof(unsigned int) == LG_SIZEOF_INT); #if (LG_SIZEOF_INT == 8) - return (unsigned)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x)); + return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x)); #elif (LG_SIZEOF_INT == 4) - return (unsigned)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x)); + return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x)); #endif } -ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new) +ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsigned int _new) { - assert(sizeof(unsigned) == LG_SIZEOF_INT); + assert(sizeof(unsigned int) == LG_SIZEOF_INT); #if (LG_SIZEOF_INT == 8) - return (unsigned)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new); + return (unsigned int)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new); #elif (LG_SIZEOF_INT == 4) - return (unsigned)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new); + return (unsigned int)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new); #endif } diff --git a/intern/audaspace/intern/AUD_SoftwareDevice.cpp b/intern/audaspace/intern/AUD_SoftwareDevice.cpp index 15594d340be..f9d65aa2363 100644 --- a/intern/audaspace/intern/AUD_SoftwareDevice.cpp +++ b/intern/audaspace/intern/AUD_SoftwareDevice.cpp @@ -365,6 +365,7 @@ bool AUD_SoftwareDevice::AUD_SoftwareHandle::seek(float position) if(!m_status) return false; + m_pitch->setPitch(m_user_pitch); m_reader->seek((int)(position * m_reader->getSpecs().rate)); if(m_status == AUD_STATUS_STOPPED) diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 5c51f9afc28..ca109734314 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -665,8 +665,10 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): cls.debug_use_cpu_sse3 = BoolProperty(name="SSE3", default=True) cls.debug_use_cpu_sse2 = BoolProperty(name="SSE2", default=True) cls.debug_use_qbvh = BoolProperty(name="QBVH", default=True) + cls.debug_use_cpu_split_kernel = BoolProperty(name="Split Kernel", default=False) cls.debug_use_cuda_adaptive_compile = BoolProperty(name="Adaptive Compile", default=False) + cls.debug_use_cuda_split_kernel = BoolProperty(name="Split Kernel", default=False) cls.debug_opencl_kernel_type = EnumProperty( name="OpenCL Kernel Type", diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 44af5f7efed..7c1e3e270fb 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -1518,10 +1518,12 @@ class CyclesRender_PT_debug(CyclesButtonsPanel, Panel): row.prop(cscene, "debug_use_cpu_avx", toggle=True) row.prop(cscene, "debug_use_cpu_avx2", toggle=True) col.prop(cscene, "debug_use_qbvh") + col.prop(cscene, "debug_use_cpu_split_kernel") col = layout.column() col.label('CUDA Flags:') col.prop(cscene, "debug_use_cuda_adaptive_compile") + col.prop(cscene, "debug_use_cuda_split_kernel") col = layout.column() col.label('OpenCL Flags:') diff --git a/intern/cycles/blender/blender_curves.cpp b/intern/cycles/blender/blender_curves.cpp index e42ff5d72a6..ffa5b676917 100644 --- a/intern/cycles/blender/blender_curves.cpp +++ b/intern/cycles/blender/blender_curves.cpp @@ -411,6 +411,7 @@ static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData, } } + mesh->resize_mesh(mesh->verts.size(), mesh->triangles.size()); mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL); mesh->attributes.remove(ATTR_STD_FACE_NORMAL); mesh->add_face_normals(); @@ -434,8 +435,8 @@ static void ExportCurveTriangleGeometry(Mesh *mesh, if(CData->curve_keynum[curve] <= 1 || CData->curve_length[curve] == 0.0f) continue; - numverts += (CData->curve_keynum[curve] - 2)*2*resolution + resolution; - numtris += (CData->curve_keynum[curve] - 2)*resolution; + numverts += (CData->curve_keynum[curve] - 1)*resolution + resolution; + numtris += (CData->curve_keynum[curve] - 1)*2*resolution; } } @@ -545,6 +546,7 @@ static void ExportCurveTriangleGeometry(Mesh *mesh, } } + mesh->resize_mesh(mesh->verts.size(), mesh->triangles.size()); mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL); mesh->attributes.remove(ATTR_STD_FACE_NORMAL); mesh->add_face_normals(); @@ -890,7 +892,7 @@ void BlenderSync::sync_curves(Mesh *mesh, } /* obtain general settings */ - bool use_curves = scene->curve_system_manager->use_curves; + const bool use_curves = scene->curve_system_manager->use_curves; if(!(use_curves && b_ob.mode() != b_ob.mode_PARTICLE_EDIT)) { if(!motion) @@ -898,11 +900,11 @@ void BlenderSync::sync_curves(Mesh *mesh, return; } - int primitive = scene->curve_system_manager->primitive; - int triangle_method = scene->curve_system_manager->triangle_method; - int resolution = scene->curve_system_manager->resolution; - size_t vert_num = mesh->verts.size(); - size_t tri_num = mesh->num_triangles(); + const int primitive = scene->curve_system_manager->primitive; + const int triangle_method = scene->curve_system_manager->triangle_method; + const int resolution = scene->curve_system_manager->resolution; + const size_t vert_num = mesh->verts.size(); + const size_t tri_num = mesh->num_triangles(); int used_res = 1; /* extract particle hair data - should be combined with connecting to mesh later*/ diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp index e269be61791..fdc287084eb 100644 --- a/intern/cycles/blender/blender_mesh.cpp +++ b/intern/cycles/blender/blender_mesh.cpp @@ -564,7 +564,7 @@ static void attr_create_pointiness(Scene *scene, * original vertex. */ vector<int> sorted_vert_indeices(num_verts); - for (int vert_index = 0; vert_index < num_verts; ++vert_index) { + for(int vert_index = 0; vert_index < num_verts; ++vert_index) { sorted_vert_indeices[vert_index] = vert_index; } VertexAverageComparator compare(mesh->verts); @@ -573,9 +573,9 @@ static void attr_create_pointiness(Scene *scene, * index. */ vector<int> vert_orig_index(num_verts); - for (int sorted_vert_index = 0; - sorted_vert_index < num_verts; - ++sorted_vert_index) + for(int sorted_vert_index = 0; + sorted_vert_index < num_verts; + ++sorted_vert_index) { const int vert_index = sorted_vert_indeices[sorted_vert_index]; const float3 &vert_co = mesh->verts[vert_index]; @@ -589,12 +589,12 @@ static void attr_create_pointiness(Scene *scene, const float3 &other_vert_co = mesh->verts[other_vert_index]; /* We are too far away now, we wouldn't have duplicate. */ if ((other_vert_co.x + other_vert_co.y + other_vert_co.z) - - (vert_co.x + vert_co.y + vert_co.z) > 0.0f) + (vert_co.x + vert_co.y + vert_co.z) > 3 * FLT_EPSILON) { break; } /* Found duplicate. */ - if(other_vert_co == vert_co) { + if(len_squared(other_vert_co - vert_co) < FLT_EPSILON) { found = true; vert_orig_index[vert_index] = other_vert_index; break; @@ -777,6 +777,15 @@ static void create_mesh(Scene *scene, int shader = clamp(f->material_index(), 0, used_shaders.size()-1); bool smooth = f->use_smooth() || use_loop_normals; + if(use_loop_normals) { + BL::Array<float, 12> loop_normals = f->split_normals(); + for(int i = 0; i < n; i++) { + N[vi[i]] = make_float3(loop_normals[i * 3], + loop_normals[i * 3 + 1], + loop_normals[i * 3 + 2]); + } + } + /* Create triangles. * * NOTE: Autosmooth is already taken care about. diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp index 438abc49f88..75118c43747 100644 --- a/intern/cycles/blender/blender_python.cpp +++ b/intern/cycles/blender/blender_python.cpp @@ -67,8 +67,10 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene) flags.cpu.sse3 = get_boolean(cscene, "debug_use_cpu_sse3"); flags.cpu.sse2 = get_boolean(cscene, "debug_use_cpu_sse2"); flags.cpu.qbvh = get_boolean(cscene, "debug_use_qbvh"); + flags.cpu.split_kernel = get_boolean(cscene, "debug_use_cpu_split_kernel"); /* Synchronize CUDA flags. */ flags.cuda.adaptive_compile = get_boolean(cscene, "debug_use_cuda_adaptive_compile"); + flags.cuda.split_kernel = get_boolean(cscene, "debug_use_cuda_split_kernel"); /* Synchronize OpenCL kernel type. */ switch(get_enum(cscene, "debug_opencl_kernel_type")) { case 0: diff --git a/intern/cycles/blender/blender_util.h b/intern/cycles/blender/blender_util.h index 8120de96362..23df3c1bc30 100644 --- a/intern/cycles/blender/blender_util.h +++ b/intern/cycles/blender/blender_util.h @@ -79,7 +79,7 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data, me.calc_normals_split(); } else { - me.split_faces(); + me.split_faces(false); } } if(subdivision_type == Mesh::SUBDIVISION_NONE) { diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp index 874a4246d1d..1fb2f371a0f 100644 --- a/intern/cycles/bvh/bvh.cpp +++ b/intern/cycles/bvh/bvh.cpp @@ -81,6 +81,7 @@ void BVH::build(Progress& progress) pack.prim_type, pack.prim_index, pack.prim_object, + pack.prim_time, params, progress); BVHNode *root = bvh_build.run(); @@ -256,6 +257,10 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size) pack.leaf_nodes.resize(leaf_nodes_size); pack.object_node.resize(objects.size()); + if(params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0) { + pack.prim_time.resize(prim_index_size); + } + int *pack_prim_index = (pack.prim_index.size())? &pack.prim_index[0]: NULL; int *pack_prim_type = (pack.prim_type.size())? &pack.prim_type[0]: NULL; int *pack_prim_object = (pack.prim_object.size())? &pack.prim_object[0]: NULL; @@ -264,6 +269,7 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size) uint *pack_prim_tri_index = (pack.prim_tri_index.size())? &pack.prim_tri_index[0]: NULL; int4 *pack_nodes = (pack.nodes.size())? &pack.nodes[0]: NULL; int4 *pack_leaf_nodes = (pack.leaf_nodes.size())? &pack.leaf_nodes[0]: NULL; + float2 *pack_prim_time = (pack.prim_time.size())? &pack.prim_time[0]: NULL; /* merge */ foreach(Object *ob, objects) { @@ -309,6 +315,7 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size) int *bvh_prim_type = &bvh->pack.prim_type[0]; uint *bvh_prim_visibility = &bvh->pack.prim_visibility[0]; uint *bvh_prim_tri_index = &bvh->pack.prim_tri_index[0]; + float2 *bvh_prim_time = bvh->pack.prim_time.size()? &bvh->pack.prim_time[0]: NULL; for(size_t i = 0; i < bvh_prim_index_size; i++) { if(bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) { @@ -324,6 +331,9 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size) pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i]; pack_prim_visibility[pack_prim_index_offset] = bvh_prim_visibility[i]; pack_prim_object[pack_prim_index_offset] = 0; // unused for instances + if(bvh_prim_time != NULL) { + pack_prim_time[pack_prim_index_offset] = bvh_prim_time[i]; + } pack_prim_index_offset++; } } diff --git a/intern/cycles/bvh/bvh.h b/intern/cycles/bvh/bvh.h index 35f4d305883..08f41fc736f 100644 --- a/intern/cycles/bvh/bvh.h +++ b/intern/cycles/bvh/bvh.h @@ -68,6 +68,8 @@ struct PackedBVH { array<int> prim_index; /* mapping from BVH primitive index, to the object id of that primitive. */ array<int> prim_object; + /* Time range of BVH primitive. */ + array<float2> prim_time; /* index of the root node. */ int root_index; diff --git a/intern/cycles/bvh/bvh_build.cpp b/intern/cycles/bvh/bvh_build.cpp index a2f8b33cb0b..517afc75641 100644 --- a/intern/cycles/bvh/bvh_build.cpp +++ b/intern/cycles/bvh/bvh_build.cpp @@ -93,12 +93,14 @@ BVHBuild::BVHBuild(const vector<Object*>& objects_, array<int>& prim_type_, array<int>& prim_index_, array<int>& prim_object_, + array<float2>& prim_time_, const BVHParams& params_, Progress& progress_) : objects(objects_), prim_type(prim_type_), prim_index(prim_index_), prim_object(prim_object_), + prim_time(prim_time_), params(params_), progress(progress_), progress_start_time(0.0), @@ -465,6 +467,9 @@ BVHNode* BVHBuild::run() } spatial_free_index = 0; + need_prim_time = params.num_motion_curve_steps > 0 || + params.num_motion_triangle_steps > 0; + /* init progress updates */ double build_start_time; build_start_time = progress_start_time = time_dt(); @@ -475,6 +480,12 @@ BVHNode* BVHBuild::run() prim_type.resize(references.size()); prim_index.resize(references.size()); prim_object.resize(references.size()); + if(need_prim_time) { + prim_time.resize(references.size()); + } + else { + prim_time.resize(0); + } /* build recursively */ BVHNode *rootnode; @@ -849,6 +860,9 @@ BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start, prim_type[start] = ref->prim_type(); prim_index[start] = ref->prim_index(); prim_object[start] = ref->prim_object(); + if(need_prim_time) { + prim_time[start] = make_float2(ref->time_from(), ref->time_to()); + } uint visibility = objects[ref->prim_object()]->visibility; BVHNode *leaf_node = new LeafNode(ref->bounds(), visibility, start, start+1); @@ -891,11 +905,13 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range, * can not control. */ typedef StackAllocator<256, int> LeafStackAllocator; + typedef StackAllocator<256, float2> LeafTimeStackAllocator; typedef StackAllocator<256, BVHReference> LeafReferenceStackAllocator; vector<int, LeafStackAllocator> p_type[PRIMITIVE_NUM_TOTAL]; vector<int, LeafStackAllocator> p_index[PRIMITIVE_NUM_TOTAL]; vector<int, LeafStackAllocator> p_object[PRIMITIVE_NUM_TOTAL]; + vector<float2, LeafTimeStackAllocator> p_time[PRIMITIVE_NUM_TOTAL]; vector<BVHReference, LeafReferenceStackAllocator> p_ref[PRIMITIVE_NUM_TOTAL]; /* TODO(sergey): In theory we should be able to store references. */ @@ -918,6 +934,8 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range, p_type[type_index].push_back(ref.prim_type()); p_index[type_index].push_back(ref.prim_index()); p_object[type_index].push_back(ref.prim_object()); + p_time[type_index].push_back(make_float2(ref.time_from(), + ref.time_to())); bounds[type_index].grow(ref.bounds()); visibility[type_index] |= objects[ref.prim_object()]->visibility; @@ -947,9 +965,13 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range, vector<int, LeafStackAllocator> local_prim_type, local_prim_index, local_prim_object; + vector<float2, LeafTimeStackAllocator> local_prim_time; local_prim_type.resize(num_new_prims); local_prim_index.resize(num_new_prims); local_prim_object.resize(num_new_prims); + if(need_prim_time) { + local_prim_time.resize(num_new_prims); + } for(int i = 0; i < PRIMITIVE_NUM_TOTAL; ++i) { int num = (int)p_type[i].size(); if(num != 0) { @@ -962,6 +984,9 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range, local_prim_type[index] = p_type[i][j]; local_prim_index[index] = p_index[i][j]; local_prim_object[index] = p_object[i][j]; + if(need_prim_time) { + local_prim_time[index] = p_time[i][j]; + } if(params.use_unaligned_nodes && !alignment_found) { alignment_found = unaligned_heuristic.compute_aligned_space(p_ref[i][j], @@ -1028,11 +1053,17 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range, prim_type.reserve(reserve); prim_index.reserve(reserve); prim_object.reserve(reserve); + if(need_prim_time) { + prim_time.reserve(reserve); + } } prim_type.resize(range_end); prim_index.resize(range_end); prim_object.resize(range_end); + if(need_prim_time) { + prim_time.resize(range_end); + } } spatial_spin_lock.unlock(); @@ -1041,6 +1072,9 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range, memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size); memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size); memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size); + if(need_prim_time) { + memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2)*num_new_leaf_data); + } } } else { @@ -1053,6 +1087,9 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range, memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size); memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size); memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size); + if(need_prim_time) { + memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2)*num_new_leaf_data); + } } } diff --git a/intern/cycles/bvh/bvh_build.h b/intern/cycles/bvh/bvh_build.h index ee3cde66a2f..430efc3e0f6 100644 --- a/intern/cycles/bvh/bvh_build.h +++ b/intern/cycles/bvh/bvh_build.h @@ -48,6 +48,7 @@ public: array<int>& prim_type, array<int>& prim_index, array<int>& prim_object, + array<float2>& prim_time, const BVHParams& params, Progress& progress); ~BVHBuild(); @@ -112,6 +113,9 @@ protected: array<int>& prim_type; array<int>& prim_index; array<int>& prim_object; + array<float2>& prim_time; + + bool need_prim_time; /* Build parameters. */ BVHParams params; diff --git a/intern/cycles/bvh/bvh_params.h b/intern/cycles/bvh/bvh_params.h index 65f9da1c194..7b309504728 100644 --- a/intern/cycles/bvh/bvh_params.h +++ b/intern/cycles/bvh/bvh_params.h @@ -104,6 +104,7 @@ public: primitive_mask = PRIMITIVE_ALL; num_motion_curve_steps = 0; + num_motion_triangle_steps = 0; } /* SAH costs */ diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt index 966ff5e52ba..a2373451696 100644 --- a/intern/cycles/device/CMakeLists.txt +++ b/intern/cycles/device/CMakeLists.txt @@ -3,6 +3,7 @@ set(INC . ../graph ../kernel + ../kernel/split ../kernel/svm ../kernel/osl ../util @@ -33,6 +34,7 @@ set(SRC device_cuda.cpp device_multi.cpp device_opencl.cpp + device_split_kernel.cpp device_task.cpp ) @@ -56,6 +58,7 @@ set(SRC_HEADERS device_memory.h device_intern.h device_network.h + device_split_kernel.h device_task.h ) diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 31c99f49d6d..6b07b9d04bd 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -80,7 +80,7 @@ Device::~Device() void Device::pixels_alloc(device_memory& mem) { - mem_alloc(mem, MEM_READ_WRITE); + mem_alloc("pixels", mem, MEM_READ_WRITE); } void Device::pixels_copy_from(device_memory& mem, int y, int w, int h) diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index ccee25ae34e..c740cada98b 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -234,7 +234,7 @@ public: Stats &stats; /* regular memory */ - virtual void mem_alloc(device_memory& mem, MemoryType type) = 0; + virtual void mem_alloc(const char *name, device_memory& mem, MemoryType type) = 0; virtual void mem_copy_to(device_memory& mem) = 0; virtual void mem_copy_from(device_memory& mem, int y, int w, int h, int elem) = 0; diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index c8e001ec2fd..06a1568b4d6 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -26,10 +26,12 @@ #include "device.h" #include "device_intern.h" +#include "device_split_kernel.h" #include "kernel.h" #include "kernel_compat_cpu.h" #include "kernel_types.h" +#include "split/kernel_split_data.h" #include "kernel_globals.h" #include "osl_shader.h" @@ -41,6 +43,7 @@ #include "util_foreach.h" #include "util_function.h" #include "util_logging.h" +#include "util_map.h" #include "util_opengl.h" #include "util_progress.h" #include "util_system.h" @@ -48,8 +51,93 @@ CCL_NAMESPACE_BEGIN +class CPUDevice; + +class CPUSplitKernel : public DeviceSplitKernel { + CPUDevice *device; +public: + explicit CPUSplitKernel(CPUDevice *device); + + virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim, + RenderTile& rtile, + int num_global_elements, + device_memory& kernel_globals, + device_memory& kernel_data_, + device_memory& split_data, + device_memory& ray_state, + device_memory& queue_index, + device_memory& use_queues_flag, + device_memory& work_pool_wgs); + + virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&); + virtual int2 split_kernel_local_size(); + virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task); + virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads); +}; + class CPUDevice : public Device { + static unordered_map<string, void*> kernel_functions; + + static void register_kernel_function(const char* name, void* func) + { + kernel_functions[name] = func; + } + + static const char* get_arch_name() + { +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 + if(system_cpu_support_avx2()) { + return "cpu_avx2"; + } + else +#endif +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX + if(system_cpu_support_avx()) { + return "cpu_avx"; + } + else +#endif +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 + if(system_cpu_support_sse41()) { + return "cpu_sse41"; + } + else +#endif +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 + if(system_cpu_support_sse3()) { + return "cpu_sse3"; + } + else +#endif +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 + if(system_cpu_support_sse2()) { + return "cpu_sse2"; + } + else +#endif + { + return "cpu"; + } + } + + template<typename F> + static F get_kernel_function(string name) + { + name = string("kernel_") + get_arch_name() + "_" + name; + + unordered_map<string, void*>::iterator it = kernel_functions.find(name); + + if(it == kernel_functions.end()) { + assert(!"kernel function not found"); + return NULL; + } + + return (F)it->second; + } + + friend class CPUSplitKernel; + public: TaskPool task_pool; KernelGlobals kernel_globals; @@ -57,10 +145,15 @@ public: #ifdef WITH_OSL OSLGlobals osl_globals; #endif + + bool use_split_kernel; + + DeviceRequestedFeatures requested_features; CPUDevice(DeviceInfo& info, Stats &stats, bool background) : Device(info, stats, background) { + #ifdef WITH_OSL kernel_globals.osl = &osl_globals; #endif @@ -105,6 +198,28 @@ public: { VLOG(1) << "Will be using regular kernels."; } + + use_split_kernel = DebugFlags().cpu.split_kernel; + if(use_split_kernel) { + VLOG(1) << "Will be using split kernel."; + } + + kernel_cpu_register_functions(register_kernel_function); +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 + kernel_cpu_sse2_register_functions(register_kernel_function); +#endif +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 + kernel_cpu_sse3_register_functions(register_kernel_function); +#endif +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 + kernel_cpu_sse41_register_functions(register_kernel_function); +#endif +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX + kernel_cpu_avx_register_functions(register_kernel_function); +#endif +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 + kernel_cpu_avx2_register_functions(register_kernel_function); +#endif } ~CPUDevice() @@ -117,9 +232,20 @@ public: return (TaskScheduler::num_threads() == 1); } - void mem_alloc(device_memory& mem, MemoryType /*type*/) + void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/) { + if(name) { + VLOG(1) << "Buffer allocate: " << name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; + } + mem.device_pointer = mem.data_pointer; + + if(!mem.device_pointer) { + mem.device_pointer = (device_ptr)malloc(mem.memory_size()); + } + mem.device_size = mem.memory_size(); stats.mem_alloc(mem.device_size); } @@ -144,6 +270,10 @@ public: void mem_free(device_memory& mem) { if(mem.device_pointer) { + if(!mem.data_pointer) { + free((void*)mem.device_pointer); + } + mem.device_pointer = 0; stats.mem_free(mem.device_size); mem.device_size = 0; @@ -196,8 +326,14 @@ public: void thread_run(DeviceTask *task) { - if(task->type == DeviceTask::PATH_TRACE) - thread_path_trace(*task); + if(task->type == DeviceTask::PATH_TRACE) { + if(!use_split_kernel) { + thread_path_trace(*task); + } + else { + thread_path_trace_split(*task); + } + } else if(task->type == DeviceTask::FILM_CONVERT) thread_film_convert(*task); else if(task->type == DeviceTask::SHADER) @@ -258,7 +394,7 @@ public: { path_trace_kernel = kernel_cpu_path_trace; } - + while(task.acquire_tile(this, tile)) { float *render_buffer = (float*)tile.buffer; uint *rng_state = (uint*)tile.rng_state; @@ -294,6 +430,49 @@ public: thread_kernel_globals_free(&kg); } + void thread_path_trace_split(DeviceTask& task) + { + if(task_pool.canceled()) { + if(task.need_finish_queue == false) + return; + } + + RenderTile tile; + + CPUSplitKernel split_kernel(this); + + /* allocate buffer for kernel globals */ + device_memory kgbuffer; + kgbuffer.resize(sizeof(KernelGlobals)); + mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE); + + KernelGlobals *kg = (KernelGlobals*)kgbuffer.device_pointer; + *kg = thread_kernel_globals_init(); + + requested_features.max_closure = MAX_CLOSURE; + if(!split_kernel.load_kernels(requested_features)) { + thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer); + mem_free(kgbuffer); + + return; + } + + while(task.acquire_tile(this, tile)) { + device_memory data; + split_kernel.path_trace(&task, tile, kgbuffer, data); + + task.release_tile(tile); + + if(task_pool.canceled()) { + if(task.need_finish_queue == false) + break; + } + } + + thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer); + mem_free(kgbuffer); + } + void thread_film_convert(DeviceTask& task) { float sample_scale = 1.0f/(task.sample + 1); @@ -501,6 +680,10 @@ protected: inline void thread_kernel_globals_free(KernelGlobals *kg) { + if(kg == NULL) { + return; + } + if(kg->transparent_shadow_intersections != NULL) { free(kg->transparent_shadow_intersections); } @@ -515,8 +698,176 @@ protected: OSLShader::thread_free(kg); #endif } + + virtual bool load_kernels(DeviceRequestedFeatures& requested_features_) { + requested_features = requested_features_; + + return true; + } +}; + +/* split kernel */ + +class CPUSplitKernelFunction : public SplitKernelFunction { +public: + CPUDevice* device; + void (*func)(KernelGlobals *kg, KernelData *data); + + CPUSplitKernelFunction(CPUDevice* device) : device(device), func(NULL) {} + ~CPUSplitKernelFunction() {} + + virtual bool enqueue(const KernelDimensions& dim, device_memory& kernel_globals, device_memory& data) + { + if(!func) { + return false; + } + + KernelGlobals *kg = (KernelGlobals*)kernel_globals.device_pointer; + kg->global_size = make_int2(dim.global_size[0], dim.global_size[1]); + + for(int y = 0; y < dim.global_size[1]; y++) { + for(int x = 0; x < dim.global_size[0]; x++) { + kg->global_id = make_int2(x, y); + + func(kg, (KernelData*)data.device_pointer); + } + } + + return true; + } }; +CPUSplitKernel::CPUSplitKernel(CPUDevice *device) : DeviceSplitKernel(device), device(device) +{ +} + +bool CPUSplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim, + RenderTile& rtile, + int num_global_elements, + device_memory& kernel_globals, + device_memory& data, + device_memory& split_data, + device_memory& ray_state, + device_memory& queue_index, + device_memory& use_queues_flags, + device_memory& work_pool_wgs) +{ + typedef void(*data_init_t)(KernelGlobals *kg, + ccl_constant KernelData *data, + ccl_global void *split_data_buffer, + int num_elements, + ccl_global char *ray_state, + ccl_global uint *rng_state, + int start_sample, + int end_sample, + int sx, int sy, int sw, int sh, int offset, int stride, + ccl_global int *Queue_index, + int queuesize, + ccl_global char *use_queues_flag, + ccl_global unsigned int *work_pool_wgs, + unsigned int num_samples, + ccl_global float *buffer); + + data_init_t data_init; + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 + if(system_cpu_support_avx2()) { + data_init = kernel_cpu_avx2_data_init; + } + else +#endif +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX + if(system_cpu_support_avx()) { + data_init = kernel_cpu_avx_data_init; + } + else +#endif +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 + if(system_cpu_support_sse41()) { + data_init = kernel_cpu_sse41_data_init; + } + else +#endif +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 + if(system_cpu_support_sse3()) { + data_init = kernel_cpu_sse3_data_init; + } + else +#endif +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 + if(system_cpu_support_sse2()) { + data_init = kernel_cpu_sse2_data_init; + } + else +#endif + { + data_init = kernel_cpu_data_init; + } + + KernelGlobals *kg = (KernelGlobals*)kernel_globals.device_pointer; + kg->global_size = make_int2(dim.global_size[0], dim.global_size[1]); + + for(int y = 0; y < dim.global_size[1]; y++) { + for(int x = 0; x < dim.global_size[0]; x++) { + kg->global_id = make_int2(x, y); + + data_init((KernelGlobals*)kernel_globals.device_pointer, + (KernelData*)data.device_pointer, + (void*)split_data.device_pointer, + num_global_elements, + (char*)ray_state.device_pointer, + (uint*)rtile.rng_state, + rtile.start_sample, + rtile.start_sample + rtile.num_samples, + rtile.x, + rtile.y, + rtile.w, + rtile.h, + rtile.offset, + rtile.stride, + (int*)queue_index.device_pointer, + dim.global_size[0] * dim.global_size[1], + (char*)use_queues_flags.device_pointer, + (uint*)work_pool_wgs.device_pointer, + rtile.num_samples, + (float*)rtile.buffer); + } + } + + return true; +} + +SplitKernelFunction* CPUSplitKernel::get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&) +{ + CPUSplitKernelFunction *kernel = new CPUSplitKernelFunction(device); + + kernel->func = device->get_kernel_function<void(*)(KernelGlobals*, KernelData*)>(kernel_name); + if(!kernel->func) { + delete kernel; + return NULL; + } + + return kernel; +} + +int2 CPUSplitKernel::split_kernel_local_size() +{ + return make_int2(1, 1); +} + +int2 CPUSplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memory& /*data*/, DeviceTask *task) { + /* TODO(mai): this needs investigation but cpu gives incorrect render if global size doesnt match tile size */ + return task->requested_tile_size; +} + +size_t CPUSplitKernel::state_buffer_size(device_memory& kernel_globals, device_memory& /*data*/, size_t num_threads) { + KernelGlobals *kg = (KernelGlobals*)kernel_globals.device_pointer; + + return split_data_buffer_size(kg, num_threads); +} + +unordered_map<string, void*> CPUDevice::kernel_functions; + Device *device_cpu_create(DeviceInfo& info, Stats &stats, bool background) { return new CPUDevice(info, stats, background); diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index dafac6dfcb3..a630a3d1183 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -15,12 +15,14 @@ */ #include <climits> +#include <limits.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include "device.h" #include "device_intern.h" +#include "device_split_kernel.h" #include "buffers.h" @@ -42,6 +44,8 @@ #include "util_types.h" #include "util_time.h" +#include "split/kernel_split_data_types.h" + CCL_NAMESPACE_BEGIN #ifndef WITH_CUDA_DYNLOAD @@ -78,6 +82,31 @@ int cuewCompilerVersion(void) } /* namespace */ #endif /* WITH_CUDA_DYNLOAD */ +class CUDADevice; + +class CUDASplitKernel : public DeviceSplitKernel { + CUDADevice *device; +public: + explicit CUDASplitKernel(CUDADevice *device); + + virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads); + + virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim, + RenderTile& rtile, + int num_global_elements, + device_memory& kernel_globals, + device_memory& kernel_data_, + device_memory& split_data, + device_memory& ray_state, + device_memory& queue_index, + device_memory& use_queues_flag, + device_memory& work_pool_wgs); + + virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&); + virtual int2 split_kernel_local_size(); + virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task); +}; + class CUDADevice : public Device { public: @@ -258,11 +287,16 @@ public: return DebugFlags().cuda.adaptive_compile; } + bool use_split_kernel() + { + return DebugFlags().cuda.split_kernel; + } + /* Common NVCC flags which stays the same regardless of shading model, * kernel sources md5 and only depends on compiler or compilation settings. */ string compile_kernel_get_common_cflags( - const DeviceRequestedFeatures& requested_features) + const DeviceRequestedFeatures& requested_features, bool split=false) { const int cuda_version = cuewCompilerVersion(); const int machine = system_cpu_bits(); @@ -287,6 +321,11 @@ public: #ifdef WITH_CYCLES_DEBUG cflags += " -D__KERNEL_DEBUG__"; #endif + + if(split) { + cflags += " -D__SPLIT__"; + } + return cflags; } @@ -320,7 +359,7 @@ public: return true; } - string compile_kernel(const DeviceRequestedFeatures& requested_features) + string compile_kernel(const DeviceRequestedFeatures& requested_features, bool split=false) { /* Compute cubin name. */ int major, minor; @@ -329,7 +368,8 @@ public: /* Attempt to use kernel provided with Blender. */ if(!use_adaptive_compilation()) { - const string cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", + const string cubin = path_get(string_printf(split ? "lib/kernel_split_sm_%d%d.cubin" + : "lib/kernel_sm_%d%d.cubin", major, minor)); VLOG(1) << "Testing for pre-compiled kernel " << cubin << "."; if(path_exists(cubin)) { @@ -339,7 +379,7 @@ public: } const string common_cflags = - compile_kernel_get_common_cflags(requested_features); + compile_kernel_get_common_cflags(requested_features, split); /* Try to use locally compiled kernel. */ const string kernel_path = path_get("kernel"); @@ -350,7 +390,8 @@ public: */ const string cubin_md5 = util_md5_string(kernel_md5 + common_cflags); - const string cubin_file = string_printf("cycles_kernel_sm%d%d_%s.cubin", + const string cubin_file = string_printf(split ? "cycles_kernel_split_sm%d%d_%s.cubin" + : "cycles_kernel_sm%d%d_%s.cubin", major, minor, cubin_md5.c_str()); const string cubin = path_cache_get(path_join("kernels", cubin_file)); @@ -385,7 +426,7 @@ public: const char *nvcc = cuewCompilerPath(); const string kernel = path_join(kernel_path, path_join("kernels", - path_join("cuda", "kernel.cu"))); + path_join("cuda", split ? "kernel_split.cu" : "kernel.cu"))); double starttime = time_dt(); printf("Compiling CUDA kernel ...\n"); @@ -433,7 +474,7 @@ public: return false; /* get kernel */ - string cubin = compile_kernel(requested_features); + string cubin = compile_kernel(requested_features, use_split_kernel()); if(cubin == "") return false; @@ -466,8 +507,14 @@ public: } } - void mem_alloc(device_memory& mem, MemoryType /*type*/) + void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/) { + if(name) { + VLOG(1) << "Buffer allocate: " << name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; + } + cuda_push_context(); CUdeviceptr device_pointer; size_t size = mem.memory_size(); @@ -504,7 +551,9 @@ public: void mem_zero(device_memory& mem) { - memset((void*)mem.data_pointer, 0, mem.memory_size()); + if(mem.data_pointer) { + memset((void*)mem.data_pointer, 0, mem.memory_size()); + } cuda_push_context(); if(mem.device_pointer) @@ -617,7 +666,7 @@ public: /* Data Storage */ if(interpolation == INTERPOLATION_NONE) { if(has_bindless_textures) { - mem_alloc(mem, MEM_READ_ONLY); + mem_alloc(NULL, mem, MEM_READ_ONLY); mem_copy_to(mem); cuda_push_context(); @@ -641,7 +690,7 @@ public: cuda_pop_context(); } else { - mem_alloc(mem, MEM_READ_ONLY); + mem_alloc(NULL, mem, MEM_READ_ONLY); mem_copy_to(mem); cuda_push_context(); @@ -1258,25 +1307,48 @@ public: /* Upload Bindless Mapping */ load_bindless_mapping(); - /* keep rendering tiles until done */ - while(task->acquire_tile(this, tile)) { - int start_sample = tile.start_sample; - int end_sample = tile.start_sample + tile.num_samples; + if(!use_split_kernel()) { + /* keep rendering tiles until done */ + while(task->acquire_tile(this, tile)) { + int start_sample = tile.start_sample; + int end_sample = tile.start_sample + tile.num_samples; - for(int sample = start_sample; sample < end_sample; sample++) { - if(task->get_cancel()) { - if(task->need_finish_queue == false) - break; - } + for(int sample = start_sample; sample < end_sample; sample++) { + if(task->get_cancel()) { + if(task->need_finish_queue == false) + break; + } - path_trace(tile, sample, branched); + path_trace(tile, sample, branched); - tile.sample = sample + 1; + tile.sample = sample + 1; - task->update_progress(&tile, tile.w*tile.h); + task->update_progress(&tile, tile.w*tile.h); + } + + task->release_tile(tile); + } + } + else { + DeviceRequestedFeatures requested_features; + if(!use_adaptive_compilation()) { + requested_features.max_closure = 64; } - task->release_tile(tile); + CUDASplitKernel split_kernel(this); + split_kernel.load_kernels(requested_features); + + while(task->acquire_tile(this, tile)) { + device_memory void_buffer; + split_kernel.path_trace(task, tile, void_buffer, void_buffer); + + task->release_tile(tile); + + if(task->get_cancel()) { + if(task->need_finish_queue == false) + break; + } + } } } else if(task->type == DeviceTask::SHADER) { @@ -1329,8 +1401,223 @@ public: { task_pool.cancel(); } + + friend class CUDASplitKernelFunction; + friend class CUDASplitKernel; }; +/* redefine the cuda_assert macro so it can be used outside of the CUDADevice class + * now that the definition of that class is complete + */ +#undef cuda_assert +#define cuda_assert(stmt) \ + { \ + CUresult result = stmt; \ + \ + if(result != CUDA_SUCCESS) { \ + string message = string_printf("CUDA error: %s in %s", cuewErrorString(result), #stmt); \ + if(device->error_msg == "") \ + device->error_msg = message; \ + fprintf(stderr, "%s\n", message.c_str()); \ + /*cuda_abort();*/ \ + device->cuda_error_documentation(); \ + } \ + } (void)0 + +/* split kernel */ + +class CUDASplitKernelFunction : public SplitKernelFunction{ + CUDADevice* device; + CUfunction func; +public: + CUDASplitKernelFunction(CUDADevice *device, CUfunction func) : device(device), func(func) {} + + /* enqueue the kernel, returns false if there is an error */ + bool enqueue(const KernelDimensions &dim, device_memory &/*kg*/, device_memory &/*data*/) + { + return enqueue(dim, NULL); + } + + /* enqueue the kernel, returns false if there is an error */ + bool enqueue(const KernelDimensions &dim, void *args[]) + { + device->cuda_push_context(); + + if(device->have_error()) + return false; + + /* we ignore dim.local_size for now, as this is faster */ + int threads_per_block; + cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); + + int xthreads = (int)sqrt(threads_per_block); + int ythreads = (int)sqrt(threads_per_block); + + int xblocks = (dim.global_size[0] + xthreads - 1)/xthreads; + int yblocks = (dim.global_size[1] + ythreads - 1)/ythreads; + + cuda_assert(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1)); + + cuda_assert(cuLaunchKernel(func, + xblocks , yblocks, 1, /* blocks */ + xthreads, ythreads, 1, /* threads */ + 0, 0, args, 0)); + + device->cuda_pop_context(); + + return !device->have_error(); + } +}; + +CUDASplitKernel::CUDASplitKernel(CUDADevice *device) : DeviceSplitKernel(device), device(device) +{ +} + +size_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory& /*data*/, size_t num_threads) +{ + device_vector<uint> size_buffer; + size_buffer.resize(1); + device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE); + + device->cuda_push_context(); + + uint threads = num_threads; + CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer); + + struct args_t { + uint* num_threads; + CUdeviceptr* size; + }; + + args_t args = { + &threads, + &d_size + }; + + CUfunction state_buffer_size; + cuda_assert(cuModuleGetFunction(&state_buffer_size, device->cuModule, "kernel_cuda_state_buffer_size")); + + cuda_assert(cuLaunchKernel(state_buffer_size, + 1, 1, 1, + 1, 1, 1, + 0, 0, &args, 0)); + + device->cuda_pop_context(); + + device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint)); + device->mem_free(size_buffer); + + return *size_buffer.get_data(); +} + +bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim, + RenderTile& rtile, + int num_global_elements, + device_memory& /*kernel_globals*/, + device_memory& /*kernel_data*/, + device_memory& split_data, + device_memory& ray_state, + device_memory& queue_index, + device_memory& use_queues_flag, + device_memory& work_pool_wgs) +{ + device->cuda_push_context(); + + CUdeviceptr d_split_data = device->cuda_device_ptr(split_data.device_pointer); + CUdeviceptr d_ray_state = device->cuda_device_ptr(ray_state.device_pointer); + CUdeviceptr d_queue_index = device->cuda_device_ptr(queue_index.device_pointer); + CUdeviceptr d_use_queues_flag = device->cuda_device_ptr(use_queues_flag.device_pointer); + CUdeviceptr d_work_pool_wgs = device->cuda_device_ptr(work_pool_wgs.device_pointer); + + CUdeviceptr d_rng_state = device->cuda_device_ptr(rtile.rng_state); + CUdeviceptr d_buffer = device->cuda_device_ptr(rtile.buffer); + + int end_sample = rtile.start_sample + rtile.num_samples; + int queue_size = dim.global_size[0] * dim.global_size[1]; + + struct args_t { + CUdeviceptr* split_data_buffer; + int* num_elements; + CUdeviceptr* ray_state; + CUdeviceptr* rng_state; + int* start_sample; + int* end_sample; + int* sx; + int* sy; + int* sw; + int* sh; + int* offset; + int* stride; + CUdeviceptr* queue_index; + int* queuesize; + CUdeviceptr* use_queues_flag; + CUdeviceptr* work_pool_wgs; + int* num_samples; + CUdeviceptr* buffer; + }; + + args_t args = { + &d_split_data, + &num_global_elements, + &d_ray_state, + &d_rng_state, + &rtile.start_sample, + &end_sample, + &rtile.x, + &rtile.y, + &rtile.w, + &rtile.h, + &rtile.offset, + &rtile.stride, + &d_queue_index, + &queue_size, + &d_use_queues_flag, + &d_work_pool_wgs, + &rtile.num_samples, + &d_buffer + }; + + CUfunction data_init; + cuda_assert(cuModuleGetFunction(&data_init, device->cuModule, "kernel_cuda_path_trace_data_init")); + if(device->have_error()) { + return false; + } + + CUDASplitKernelFunction(device, data_init).enqueue(dim, (void**)&args); + + device->cuda_pop_context(); + + return !device->have_error(); +} + +SplitKernelFunction* CUDASplitKernel::get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&) +{ + CUfunction func; + + device->cuda_push_context(); + + cuda_assert(cuModuleGetFunction(&func, device->cuModule, (string("kernel_cuda_") + kernel_name).data())); + if(device->have_error()) { + device->cuda_error_message(string_printf("kernel \"kernel_cuda_%s\" not found in module", kernel_name.data())); + return NULL; + } + + device->cuda_pop_context(); + + return new CUDASplitKernelFunction(device, func); +} + +int2 CUDASplitKernel::split_kernel_local_size() +{ + return make_int2(32, 1); +} + +int2 CUDASplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memory& /*data*/, DeviceTask */*task*/) +{ + /* TODO(mai): implement something here to detect ideal work size */ + return make_int2(256, 256); +} + bool device_cuda_init(void) { #ifdef WITH_CUDA_DYNLOAD diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h index 5b5b4dc6802..b69c3dad604 100644 --- a/intern/cycles/device/device_memory.h +++ b/intern/cycles/device/device_memory.h @@ -180,10 +180,27 @@ public: /* device pointer */ device_ptr device_pointer; -protected: - device_memory() {} + device_memory() + { + data_type = device_type_traits<uchar>::data_type; + data_elements = device_type_traits<uchar>::num_elements; + data_pointer = 0; + data_size = 0; + device_size = 0; + data_width = 0; + data_height = 0; + data_depth = 0; + device_pointer = 0; + } virtual ~device_memory() { assert(!device_pointer); } + void resize(size_t size) + { + data_size = size; + data_width = size; + } + +protected: /* no copying */ device_memory(const device_memory&); device_memory& operator = (const device_memory&); @@ -198,16 +215,8 @@ public: { data_type = device_type_traits<T>::data_type; data_elements = device_type_traits<T>::num_elements; - data_pointer = 0; - data_size = 0; - device_size = 0; - data_width = 0; - data_height = 0; - data_depth = 0; assert(data_elements > 0); - - device_pointer = 0; } virtual ~device_vector() {} @@ -266,6 +275,7 @@ public: data_height = 0; data_depth = 0; data_size = 0; + device_pointer = 0; } size_t size() diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index 31b800640d3..3368fd3d756 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -106,11 +106,11 @@ public: return true; } - void mem_alloc(device_memory& mem, MemoryType type) + void mem_alloc(const char *name, device_memory& mem, MemoryType type) { foreach(SubDevice& sub, devices) { mem.device_pointer = 0; - sub.device->mem_alloc(mem, type); + sub.device->mem_alloc(name, mem, type); sub.ptr_map[unique_ptr] = mem.device_pointer; } @@ -162,6 +162,7 @@ public: void mem_free(device_memory& mem) { device_ptr tmp = mem.device_pointer; + stats.mem_free(mem.device_size); foreach(SubDevice& sub, devices) { mem.device_pointer = sub.ptr_map[tmp]; @@ -170,7 +171,6 @@ public: } mem.device_pointer = 0; - stats.mem_free(mem.device_size); } void const_copy_to(const char *name, void *host, size_t size) @@ -202,6 +202,7 @@ public: void tex_free(device_memory& mem) { device_ptr tmp = mem.device_pointer; + stats.mem_free(mem.device_size); foreach(SubDevice& sub, devices) { mem.device_pointer = sub.ptr_map[tmp]; @@ -210,7 +211,6 @@ public: } mem.device_pointer = 0; - stats.mem_free(mem.device_size); } void pixels_alloc(device_memory& mem) diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp index 53eef6cf199..6dc4aecbc50 100644 --- a/intern/cycles/device/device_network.cpp +++ b/intern/cycles/device/device_network.cpp @@ -87,8 +87,14 @@ public: snd.write(); } - void mem_alloc(device_memory& mem, MemoryType type) + void mem_alloc(const char *name, device_memory& mem, MemoryType type) { + if(name) { + VLOG(1) << "Buffer allocate: " << name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; + } + thread_scoped_lock lock(rpc_lock); mem.device_pointer = ++mem_counter; @@ -481,7 +487,7 @@ protected: mem.data_pointer = 0; /* perform the allocation on the actual device */ - device->mem_alloc(mem, type); + device->mem_alloc(NULL, mem, type); /* store a mapping to/from client_pointer and real device pointer */ pointer_mapping_insert(client_pointer, mem.device_pointer); diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp new file mode 100644 index 00000000000..b9705077fbf --- /dev/null +++ b/intern/cycles/device/device_split_kernel.cpp @@ -0,0 +1,281 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "device_split_kernel.h" + +#include "kernel_types.h" +#include "kernel_split_data_types.h" + +#include "util_time.h" + +CCL_NAMESPACE_BEGIN + +static const double alpha = 0.1; /* alpha for rolling average */ + +DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device) +{ + current_max_closure = -1; + first_tile = true; + + avg_time_per_sample = 0.0; + + kernel_path_init = NULL; + kernel_scene_intersect = NULL; + kernel_lamp_emission = NULL; + kernel_queue_enqueue = NULL; + kernel_background_buffer_update = NULL; + kernel_shader_eval = NULL; + kernel_holdout_emission_blurring_pathtermination_ao = NULL; + kernel_direct_lighting = NULL; + kernel_shadow_blocked = NULL; + kernel_next_iteration_setup = NULL; +} + +DeviceSplitKernel::~DeviceSplitKernel() +{ + device->mem_free(split_data); + device->mem_free(ray_state); + device->mem_free(use_queues_flag); + device->mem_free(queue_index); + device->mem_free(work_pool_wgs); + + delete kernel_path_init; + delete kernel_scene_intersect; + delete kernel_lamp_emission; + delete kernel_queue_enqueue; + delete kernel_background_buffer_update; + delete kernel_shader_eval; + delete kernel_holdout_emission_blurring_pathtermination_ao; + delete kernel_direct_lighting; + delete kernel_shadow_blocked; + delete kernel_next_iteration_setup; +} + +bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_features) +{ +#define LOAD_KERNEL(name) \ + kernel_##name = get_split_kernel_function(#name, requested_features); \ + if(!kernel_##name) { \ + return false; \ + } + + LOAD_KERNEL(path_init); + LOAD_KERNEL(scene_intersect); + LOAD_KERNEL(lamp_emission); + LOAD_KERNEL(queue_enqueue); + LOAD_KERNEL(background_buffer_update); + LOAD_KERNEL(shader_eval); + LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao); + LOAD_KERNEL(direct_lighting); + LOAD_KERNEL(shadow_blocked); + LOAD_KERNEL(next_iteration_setup); + +#undef LOAD_KERNEL + + current_max_closure = requested_features.max_closure; + + return true; +} + +size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, size_t max_buffer_size) +{ + size_t size_per_element = state_buffer_size(kg, data, 1024) / 1024; + return max_buffer_size / size_per_element; +} + +bool DeviceSplitKernel::path_trace(DeviceTask *task, + RenderTile& tile, + device_memory& kgbuffer, + device_memory& kernel_data) +{ + if(device->have_error()) { + return false; + } + + /* Get local size */ + size_t local_size[2]; + { + int2 lsize = split_kernel_local_size(); + local_size[0] = lsize[0]; + local_size[1] = lsize[1]; + } + + /* Set gloabl size */ + size_t global_size[2]; + { + int2 gsize = split_kernel_global_size(kgbuffer, kernel_data, task); + + /* Make sure that set work size is a multiple of local + * work size dimensions. + */ + global_size[0] = round_up(gsize[0], local_size[0]); + global_size[1] = round_up(gsize[1], local_size[1]); + } + + /* Number of elements in the global state buffer */ + int num_global_elements = global_size[0] * global_size[1]; + + /* Allocate all required global memory once. */ + if(first_tile) { + first_tile = false; + + /* Calculate max groups */ + + /* Denotes the maximum work groups possible w.r.t. current requested tile size. */ + unsigned int max_work_groups = num_global_elements / WORK_POOL_SIZE + 1; + + /* Allocate work_pool_wgs memory. */ + work_pool_wgs.resize(max_work_groups * sizeof(unsigned int)); + device->mem_alloc("work_pool_wgs", work_pool_wgs, MEM_READ_WRITE); + + queue_index.resize(NUM_QUEUES * sizeof(int)); + device->mem_alloc("queue_index", queue_index, MEM_READ_WRITE); + + use_queues_flag.resize(sizeof(char)); + device->mem_alloc("use_queues_flag", use_queues_flag, MEM_READ_WRITE); + + ray_state.resize(num_global_elements); + device->mem_alloc("ray_state", ray_state, MEM_READ_WRITE); + + split_data.resize(state_buffer_size(kgbuffer, kernel_data, num_global_elements)); + device->mem_alloc("split_data", split_data, MEM_READ_WRITE); + } + +#define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \ + if(device->have_error()) { \ + return false; \ + } \ + if(!kernel_##name->enqueue(KernelDimensions(global_size, local_size), kgbuffer, kernel_data)) { \ + return false; \ + } + + tile.sample = tile.start_sample; + + /* for exponential increase between tile updates */ + int time_multiplier = 1; + + while(tile.sample < tile.start_sample + tile.num_samples) { + /* to keep track of how long it takes to run a number of samples */ + double start_time = time_dt(); + + /* initial guess to start rolling average */ + const int initial_num_samples = 1; + /* approx number of samples per second */ + int samples_per_second = (avg_time_per_sample > 0.0) ? + int(double(time_multiplier) / avg_time_per_sample) + 1 : initial_num_samples; + + RenderTile subtile = tile; + subtile.start_sample = tile.sample; + subtile.num_samples = min(samples_per_second, tile.start_sample + tile.num_samples - tile.sample); + + if(device->have_error()) { + return false; + } + + /* reset state memory here as global size for data_init + * kernel might not be large enough to do in kernel + */ + device->mem_zero(work_pool_wgs); + device->mem_zero(split_data); + + if(!enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size), + subtile, + num_global_elements, + kgbuffer, + kernel_data, + split_data, + ray_state, + queue_index, + use_queues_flag, + work_pool_wgs)) + { + return false; + } + + ENQUEUE_SPLIT_KERNEL(path_init, global_size, local_size); + + bool activeRaysAvailable = true; + + while(activeRaysAvailable) { + /* Twice the global work size of other kernels for + * ckPathTraceKernel_shadow_blocked_direct_lighting. */ + size_t global_size_shadow_blocked[2]; + global_size_shadow_blocked[0] = global_size[0] * 2; + global_size_shadow_blocked[1] = global_size[1]; + + /* Do path-iteration in host [Enqueue Path-iteration kernels. */ + for(int PathIter = 0; PathIter < 16; PathIter++) { + ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size); + ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size); + ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size); + ENQUEUE_SPLIT_KERNEL(background_buffer_update, global_size, local_size); + ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size); + ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size); + ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size); + ENQUEUE_SPLIT_KERNEL(shadow_blocked, global_size_shadow_blocked, local_size); + ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size); + + if(task->get_cancel()) { + return true; + } + } + + /* Decide if we should exit path-iteration in host. */ + device->mem_copy_from(ray_state, 0, global_size[0] * global_size[1] * sizeof(char), 1, 1); + + activeRaysAvailable = false; + + for(int rayStateIter = 0; rayStateIter < global_size[0] * global_size[1]; ++rayStateIter) { + if(int8_t(ray_state.get_data()[rayStateIter]) != RAY_INACTIVE) { + /* Not all rays are RAY_INACTIVE. */ + activeRaysAvailable = true; + break; + } + } + + if(task->get_cancel()) { + return true; + } + } + + double time_per_sample = ((time_dt()-start_time) / subtile.num_samples); + + if(avg_time_per_sample == 0.0) { + /* start rolling average */ + avg_time_per_sample = time_per_sample; + } + else { + avg_time_per_sample = alpha*time_per_sample + (1.0-alpha)*avg_time_per_sample; + } + +#undef ENQUEUE_SPLIT_KERNEL + + tile.sample += subtile.num_samples; + task->update_progress(&tile, tile.w*tile.h*subtile.num_samples); + + time_multiplier = min(time_multiplier << 1, 10); + + if(task->get_cancel()) { + return true; + } + } + + return true; +} + +CCL_NAMESPACE_END + + diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h new file mode 100644 index 00000000000..cc3e1aa26ae --- /dev/null +++ b/intern/cycles/device/device_split_kernel.h @@ -0,0 +1,127 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __DEVICE_SPLIT_KERNEL_H__ +#define __DEVICE_SPLIT_KERNEL_H__ + +#include "device.h" +#include "buffers.h" + +CCL_NAMESPACE_BEGIN + +/* When allocate global memory in chunks. We may not be able to + * allocate exactly "CL_DEVICE_MAX_MEM_ALLOC_SIZE" bytes in chunks; + * Since some bytes may be needed for aligning chunks of memory; + * This is the amount of memory that we dedicate for that purpose. + */ +#define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB + +/* Types used for split kernel */ + +class KernelDimensions { +public: + size_t global_size[2]; + size_t local_size[2]; + + KernelDimensions(size_t global_size_[2], size_t local_size_[2]) + { + memcpy(global_size, global_size_, sizeof(global_size)); + memcpy(local_size, local_size_, sizeof(local_size)); + } +}; + +class SplitKernelFunction { +public: + virtual ~SplitKernelFunction() {} + + /* enqueue the kernel, returns false if there is an error */ + virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) = 0; +}; + +class DeviceSplitKernel { +private: + Device *device; + + SplitKernelFunction *kernel_path_init; + SplitKernelFunction *kernel_scene_intersect; + SplitKernelFunction *kernel_lamp_emission; + SplitKernelFunction *kernel_queue_enqueue; + SplitKernelFunction *kernel_background_buffer_update; + SplitKernelFunction *kernel_shader_eval; + SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao; + SplitKernelFunction *kernel_direct_lighting; + SplitKernelFunction *kernel_shadow_blocked; + SplitKernelFunction *kernel_next_iteration_setup; + + /* Global memory variables [porting]; These memory is used for + * co-operation between different kernels; Data written by one + * kernel will be available to another kernel via this global + * memory. + */ + device_memory split_data; + device_vector<uchar> ray_state; + device_memory queue_index; /* Array of size num_queues * sizeof(int) that tracks the size of each queue. */ + + /* Flag to make sceneintersect and lampemission kernel use queues. */ + device_memory use_queues_flag; + + /* Approximate time it takes to complete one sample */ + double avg_time_per_sample; + + /* Work pool with respect to each work group. */ + device_memory work_pool_wgs; + + /* clos_max value for which the kernels have been loaded currently. */ + int current_max_closure; + + /* Marked True in constructor and marked false at the end of path_trace(). */ + bool first_tile; + +public: + explicit DeviceSplitKernel(Device* device); + virtual ~DeviceSplitKernel(); + + bool load_kernels(const DeviceRequestedFeatures& requested_features); + bool path_trace(DeviceTask *task, + RenderTile& rtile, + device_memory& kgbuffer, + device_memory& kernel_data); + + virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0; + size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, size_t max_buffer_size); + + virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim, + RenderTile& rtile, + int num_global_elements, + device_memory& kernel_globals, + device_memory& kernel_data_, + device_memory& split_data, + device_memory& ray_state, + device_memory& queue_index, + device_memory& use_queues_flag, + device_memory& work_pool_wgs) = 0; + + virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&) = 0; + virtual int2 split_kernel_local_size() = 0; + virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task) = 0; +}; + +CCL_NAMESPACE_END + +#endif /* __DEVICE_SPLIT_KERNEL_H__ */ + + + diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h index 8bd54c3d2b0..f31092fd9d2 100644 --- a/intern/cycles/device/device_task.h +++ b/intern/cycles/device/device_task.h @@ -51,6 +51,8 @@ public: int shader_filter; int shader_x, shader_w; + int passes_size; + explicit DeviceTask(Type type = PATH_TRACE); int get_subtask_count(int num, int max_size = 0); diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index 4023ba89a10..6470cb8ff7e 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -26,29 +26,29 @@ CCL_NAMESPACE_BEGIN -#define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p)) - -/* Macro declarations used with split kernel */ - -/* Macro to enable/disable work-stealing */ -#define __WORK_STEALING__ - -#define SPLIT_KERNEL_LOCAL_SIZE_X 64 -#define SPLIT_KERNEL_LOCAL_SIZE_Y 1 - -/* This value may be tuned according to the scene we are rendering. - * - * Modifying PATH_ITER_INC_FACTOR value proportional to number of expected - * ray-bounces will improve performance. - */ -#define PATH_ITER_INC_FACTOR 8 +/* Define CYCLES_DISABLE_DRIVER_WORKAROUNDS to disable workaounds for testing */ +#ifndef CYCLES_DISABLE_DRIVER_WORKAROUNDS +/* Work around AMD driver hangs by ensuring each command is finished before doing anything else. */ +# undef clEnqueueNDRangeKernel +# define clEnqueueNDRangeKernel(a, b, c, d, e, f, g, h, i) \ + clFinish(a); \ + CLEW_GET_FUN(__clewEnqueueNDRangeKernel)(a, b, c, d, e, f, g, h, i); \ + clFinish(a); + +# undef clEnqueueWriteBuffer +# define clEnqueueWriteBuffer(a, b, c, d, e, f, g, h, i) \ + clFinish(a); \ + CLEW_GET_FUN(__clewEnqueueWriteBuffer)(a, b, c, d, e, f, g, h, i); \ + clFinish(a); + +# undef clEnqueueReadBuffer +# define clEnqueueReadBuffer(a, b, c, d, e, f, g, h, i) \ + clFinish(a); \ + CLEW_GET_FUN(__clewEnqueueReadBuffer)(a, b, c, d, e, f, g, h, i); \ + clFinish(a); +#endif /* CYCLES_DISABLE_DRIVER_WORKAROUNDS */ -/* When allocate global memory in chunks. We may not be able to - * allocate exactly "CL_DEVICE_MAX_MEM_ALLOC_SIZE" bytes in chunks; - * Since some bytes may be needed for aligning chunks of memory; - * This is the amount of memory that we dedicate for that purpose. - */ -#define DATA_ALLOCATION_MEM_FACTOR 5000000 //5MB +#define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p)) struct OpenCLPlatformDevice { OpenCLPlatformDevice(cl_platform_id platform_id, @@ -248,6 +248,7 @@ public: bool device_initialized; string platform_name; + string device_name; bool opencl_error(cl_int err); void opencl_error(const string& message); @@ -266,10 +267,10 @@ public: /* Has to be implemented by the real device classes. * The base device will then load all these programs. */ - virtual void load_kernels(const DeviceRequestedFeatures& requested_features, + virtual bool load_kernels(const DeviceRequestedFeatures& requested_features, vector<OpenCLProgram*> &programs) = 0; - void mem_alloc(device_memory& mem, MemoryType type); + void mem_alloc(const char *name, device_memory& mem, MemoryType type); void mem_copy_to(device_memory& mem); void mem_copy_from(device_memory& mem, int y, int w, int h, int elem); void mem_zero(device_memory& mem); @@ -326,16 +327,39 @@ protected: class ArgumentWrapper { public: - ArgumentWrapper() : size(0), pointer(NULL) {} - template <typename T> + ArgumentWrapper() : size(0), pointer(NULL) + { + } + + ArgumentWrapper(device_memory& argument) : size(sizeof(void*)), + pointer((void*)(&argument.device_pointer)) + { + } + + template<typename T> + ArgumentWrapper(device_vector<T>& argument) : size(sizeof(void*)), + pointer((void*)(&argument.device_pointer)) + { + } + + template<typename T> ArgumentWrapper(T& argument) : size(sizeof(argument)), - pointer(&argument) { } + pointer(&argument) + { + } + ArgumentWrapper(int argument) : size(sizeof(int)), int_value(argument), - pointer(&int_value) { } + pointer(&int_value) + { + } + ArgumentWrapper(float argument) : size(sizeof(float)), float_value(argument), - pointer(&float_value) { } + pointer(&float_value) + { + } + size_t size; int int_value; float float_value; diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index a2b900312e7..c5f44f84e8c 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -82,9 +82,10 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou cpPlatform = platform_device.platform_id; cdDevice = platform_device.device_id; platform_name = platform_device.platform_name; + device_name = platform_device.device_name; VLOG(2) << "Creating new Cycles device for OpenCL platform " << platform_name << ", device " - << platform_device.device_name << "."; + << device_name << "."; { /* try to use cached context */ @@ -113,12 +114,16 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou } cqCommandQueue = clCreateCommandQueue(cxContext, cdDevice, 0, &ciErr); - if(opencl_error(ciErr)) + if(opencl_error(ciErr)) { + opencl_error("OpenCL: Error creating command queue"); return; + } null_mem = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr); - if(opencl_error(ciErr)) + if(opencl_error(ciErr)) { + opencl_error("OpenCL: Error creating memory buffer for NULL"); return; + } fprintf(stderr, "Device init success\n"); device_initialized = true; @@ -191,6 +196,8 @@ string OpenCLDeviceBase::device_md5_hash(string kernel_custom_build_options) bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_features) { + VLOG(2) << "Loading kernels for platform " << platform_name + << ", device " << device_name << "."; /* Verify if device was initialized. */ if(!device_initialized) { fprintf(stderr, "OpenCL: failed to initialize device.\n"); @@ -206,11 +213,14 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea base_program.add_kernel(ustring("convert_to_half_float")); base_program.add_kernel(ustring("shader")); base_program.add_kernel(ustring("bake")); + base_program.add_kernel(ustring("zero_buffer")); vector<OpenCLProgram*> programs; programs.push_back(&base_program); /* Call actual class to fill the vector with its programs. */ - load_kernels(requested_features, programs); + if(!load_kernels(requested_features, programs)) { + return false; + } /* Parallel compilation is supported by Cycles, but currently all OpenCL frameworks * serialize the calls internally, so it's not much use right now. @@ -242,8 +252,14 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea return true; } -void OpenCLDeviceBase::mem_alloc(device_memory& mem, MemoryType type) +void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryType type) { + if(name) { + VLOG(1) << "Buffer allocate: " << name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; + } + size_t size = mem.memory_size(); cl_mem_flags mem_flag; @@ -311,8 +327,61 @@ void OpenCLDeviceBase::mem_copy_from(device_memory& mem, int y, int w, int h, in void OpenCLDeviceBase::mem_zero(device_memory& mem) { if(mem.device_pointer) { - memset((void*)mem.data_pointer, 0, mem.memory_size()); - mem_copy_to(mem); + if(base_program.is_loaded()) { + cl_kernel ckZeroBuffer = base_program(ustring("zero_buffer")); + + size_t global_size[] = {1024, 1024}; + size_t num_threads = global_size[0] * global_size[1]; + + cl_mem d_buffer = CL_MEM_PTR(mem.device_pointer); + unsigned long long d_offset = 0; + unsigned long long d_size = 0; + + while(d_offset < mem.memory_size()) { + d_size = std::min<unsigned long long>(num_threads*sizeof(float4), mem.memory_size() - d_offset); + + kernel_set_args(ckZeroBuffer, 0, d_buffer, d_size, d_offset); + + ciErr = clEnqueueNDRangeKernel(cqCommandQueue, + ckZeroBuffer, + 2, + NULL, + global_size, + NULL, + 0, + NULL, + NULL); + opencl_assert_err(ciErr, "clEnqueueNDRangeKernel"); + + d_offset += d_size; + } + } + + if(mem.data_pointer) { + memset((void*)mem.data_pointer, 0, mem.memory_size()); + } + + if(!base_program.is_loaded()) { + void* zero = (void*)mem.data_pointer; + + if(!mem.data_pointer) { + zero = util_aligned_malloc(mem.memory_size(), 16); + memset(zero, 0, mem.memory_size()); + } + + opencl_assert(clEnqueueWriteBuffer(cqCommandQueue, + CL_MEM_PTR(mem.device_pointer), + CL_TRUE, + 0, + mem.memory_size(), + zero, + 0, + NULL, NULL)); + + if(!mem.data_pointer) { + util_aligned_free(zero); + } + } } } @@ -337,7 +406,7 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size) device_vector<uchar> *data = new device_vector<uchar>(); data->copy((uchar*)host, size); - mem_alloc(*data, MEM_READ_ONLY); + mem_alloc(name, *data, MEM_READ_ONLY); i = const_mem_map.insert(ConstMemMap::value_type(name, data)).first; } else { @@ -356,7 +425,7 @@ void OpenCLDeviceBase::tex_alloc(const char *name, VLOG(1) << "Texture allocate: " << name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; - mem_alloc(mem, MEM_READ_ONLY); + mem_alloc(NULL, mem, MEM_READ_ONLY); mem_copy_to(mem); assert(mem_map.find(name) == mem_map.end()); mem_map.insert(MemMap::value_type(name, mem.device_pointer)); diff --git a/intern/cycles/device/opencl/opencl_mega.cpp b/intern/cycles/device/opencl/opencl_mega.cpp index 6ea7619e022..049e332272b 100644 --- a/intern/cycles/device/opencl/opencl_mega.cpp +++ b/intern/cycles/device/opencl/opencl_mega.cpp @@ -43,11 +43,12 @@ public: return true; } - virtual void load_kernels(const DeviceRequestedFeatures& /*requested_features*/, + virtual bool load_kernels(const DeviceRequestedFeatures& /*requested_features*/, vector<OpenCLProgram*> &programs) { path_trace_program.add_kernel(ustring("path_trace")); programs.push_back(&path_trace_program); + return true; } ~OpenCLDeviceMegaKernel() diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 3c3c2150128..b651b4a848e 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -21,325 +21,48 @@ #include "buffers.h" #include "kernel_types.h" +#include "kernel_split_data_types.h" +#include "device_split_kernel.h" + +#include "util_logging.h" #include "util_md5.h" #include "util_path.h" #include "util_time.h" CCL_NAMESPACE_BEGIN -/* TODO(sergey): This is to keep tile split on OpenCL level working - * for now, since without this view-port render does not work as it - * should. - * - * Ideally it'll be done on the higher level, but we need to get ready - * for merge rather soon, so let's keep split logic private here in - * the file. - */ -class SplitRenderTile : public RenderTile { -public: - SplitRenderTile() - : RenderTile(), - buffer_offset_x(0), - buffer_offset_y(0), - rng_state_offset_x(0), - rng_state_offset_y(0), - buffer_rng_state_stride(0) {} - - explicit SplitRenderTile(RenderTile& tile) - : RenderTile(), - buffer_offset_x(0), - buffer_offset_y(0), - rng_state_offset_x(0), - rng_state_offset_y(0), - buffer_rng_state_stride(0) - { - x = tile.x; - y = tile.y; - w = tile.w; - h = tile.h; - start_sample = tile.start_sample; - num_samples = tile.num_samples; - sample = tile.sample; - resolution = tile.resolution; - offset = tile.offset; - stride = tile.stride; - buffer = tile.buffer; - rng_state = tile.rng_state; - buffers = tile.buffers; +class OpenCLSplitKernel; + +static string get_build_options(OpenCLDeviceBase *device, const DeviceRequestedFeatures& requested_features) +{ + string build_options = "-D__SPLIT_KERNEL__ "; + build_options += requested_features.get_build_options(); + + /* Set compute device build option. */ + cl_device_type device_type; + device->ciErr = clGetDeviceInfo(device->cdDevice, + CL_DEVICE_TYPE, + sizeof(cl_device_type), + &device_type, + NULL); + assert(device->ciErr == CL_SUCCESS); + if(device_type == CL_DEVICE_TYPE_GPU) { + build_options += " -D__COMPUTE_DEVICE_GPU__"; } - /* Split kernel is device global memory constrained; - * hence split kernel cant render big tile size's in - * one go. If the user sets a big tile size (big tile size - * is a term relative to the available device global memory), - * we split the tile further and then call path_trace on - * each of those split tiles. The following variables declared, - * assist in achieving that purpose - */ - int buffer_offset_x; - int buffer_offset_y; - int rng_state_offset_x; - int rng_state_offset_y; - int buffer_rng_state_stride; -}; + return build_options; +} /* OpenCLDeviceSplitKernel's declaration/definition. */ class OpenCLDeviceSplitKernel : public OpenCLDeviceBase { public: - /* Kernel declaration. */ + DeviceSplitKernel *split_kernel; OpenCLProgram program_data_init; - OpenCLProgram program_scene_intersect; - OpenCLProgram program_lamp_emission; - OpenCLProgram program_queue_enqueue; - OpenCLProgram program_background_buffer_update; - OpenCLProgram program_shader_eval; - OpenCLProgram program_holdout_emission_blurring_pathtermination_ao; - OpenCLProgram program_direct_lighting; - OpenCLProgram program_shadow_blocked; - OpenCLProgram program_next_iteration_setup; - OpenCLProgram program_sum_all_radiance; - - /* Global memory variables [porting]; These memory is used for - * co-operation between different kernels; Data written by one - * kernel will be available to another kernel via this global - * memory. - */ - cl_mem rng_coop; - cl_mem throughput_coop; - cl_mem L_transparent_coop; - cl_mem PathRadiance_coop; - cl_mem Ray_coop; - cl_mem PathState_coop; - cl_mem Intersection_coop; - cl_mem kgbuffer; /* KernelGlobals buffer. */ - - /* Global buffers for ShaderData. */ - cl_mem sd; /* ShaderData used in the main path-iteration loop. */ - cl_mem sd_DL_shadow; /* ShaderData used in Direct Lighting and - * shadow_blocked kernel. - */ - - /* Global memory required for shadow blocked and accum_radiance. */ - cl_mem BSDFEval_coop; - cl_mem ISLamp_coop; - cl_mem LightRay_coop; - cl_mem AOAlpha_coop; - cl_mem AOBSDF_coop; - cl_mem AOLightRay_coop; - cl_mem Intersection_coop_shadow; - -#ifdef WITH_CYCLES_DEBUG - /* DebugData memory */ - cl_mem debugdata_coop; -#endif - - /* Global state array that tracks ray state. */ - cl_mem ray_state; - - /* Per sample buffers. */ - cl_mem per_sample_output_buffers; - - /* Denotes which sample each ray is being processed for. */ - cl_mem work_array; - - /* Queue */ - cl_mem Queue_data; /* Array of size queuesize * num_queues * sizeof(int). */ - cl_mem Queue_index; /* Array of size num_queues * sizeof(int); - * Tracks the size of each queue. - */ - - /* Flag to make sceneintersect and lampemission kernel use queues. */ - cl_mem use_queues_flag; - - /* Amount of memory in output buffer associated with one pixel/thread. */ - size_t per_thread_output_buffer_size; - - /* Total allocatable available device memory. */ - size_t total_allocatable_memory; - - /* host version of ray_state; Used in checking host path-iteration - * termination. - */ - char *hostRayStateArray; - - /* Number of path-iterations to be done in one shot. */ - unsigned int PathIteration_times; - -#ifdef __WORK_STEALING__ - /* Work pool with respect to each work group. */ - cl_mem work_pool_wgs; - - /* Denotes the maximum work groups possible w.r.t. current tile size. */ - unsigned int max_work_groups; -#endif - - /* clos_max value for which the kernels have been loaded currently. */ - int current_max_closure; - - /* Marked True in constructor and marked false at the end of path_trace(). */ - bool first_tile; - - OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, bool background_) - : OpenCLDeviceBase(info, stats, background_) - { - background = background_; - - /* Initialize cl_mem variables. */ - kgbuffer = NULL; - sd = NULL; - sd_DL_shadow = NULL; - - rng_coop = NULL; - throughput_coop = NULL; - L_transparent_coop = NULL; - PathRadiance_coop = NULL; - Ray_coop = NULL; - PathState_coop = NULL; - Intersection_coop = NULL; - ray_state = NULL; - - AOAlpha_coop = NULL; - AOBSDF_coop = NULL; - AOLightRay_coop = NULL; - BSDFEval_coop = NULL; - ISLamp_coop = NULL; - LightRay_coop = NULL; - Intersection_coop_shadow = NULL; - -#ifdef WITH_CYCLES_DEBUG - debugdata_coop = NULL; -#endif - - work_array = NULL; - - /* Queue. */ - Queue_data = NULL; - Queue_index = NULL; - use_queues_flag = NULL; - - per_sample_output_buffers = NULL; - - per_thread_output_buffer_size = 0; - hostRayStateArray = NULL; - PathIteration_times = PATH_ITER_INC_FACTOR; -#ifdef __WORK_STEALING__ - work_pool_wgs = NULL; - max_work_groups = 0; -#endif - current_max_closure = -1; - first_tile = true; - - /* Get device's maximum memory that can be allocated. */ - ciErr = clGetDeviceInfo(cdDevice, - CL_DEVICE_MAX_MEM_ALLOC_SIZE, - sizeof(size_t), - &total_allocatable_memory, - NULL); - assert(ciErr == CL_SUCCESS); - if(platform_name == "AMD Accelerated Parallel Processing") { - /* This value is tweak-able; AMD platform does not seem to - * give maximum performance when all of CL_DEVICE_MAX_MEM_ALLOC_SIZE - * is considered for further computation. - */ - total_allocatable_memory /= 2; - } - } - - virtual bool show_samples() const { - return false; - } - - /* Split kernel utility functions. */ - size_t get_tex_size(const char *tex_name) - { - cl_mem ptr; - size_t ret_size = 0; - MemMap::iterator i = mem_map.find(tex_name); - if(i != mem_map.end()) { - ptr = CL_MEM_PTR(i->second); - ciErr = clGetMemObjectInfo(ptr, - CL_MEM_SIZE, - sizeof(ret_size), - &ret_size, - NULL); - assert(ciErr == CL_SUCCESS); - } - return ret_size; - } - - size_t get_shader_data_size(size_t max_closure) - { - /* ShaderData size with variable size ShaderClosure array */ - return sizeof(ShaderData) - (sizeof(ShaderClosure) * (MAX_CLOSURE - max_closure)); - } - - /* Returns size of KernelGlobals structure associated with OpenCL. */ - size_t get_KernelGlobals_size() - { - /* Copy dummy KernelGlobals related to OpenCL from kernel_globals.h to - * fetch its size. - */ - typedef struct KernelGlobals { - ccl_constant KernelData *data; -#define KERNEL_TEX(type, ttype, name) \ - ccl_global type *name; -#include "kernel_textures.h" -#undef KERNEL_TEX - void *sd_input; - void *isect_shadow; - } KernelGlobals; - - return sizeof(KernelGlobals); - } + OpenCLProgram program_state_buffer_size; - virtual void load_kernels(const DeviceRequestedFeatures& requested_features, - vector<OpenCLProgram*> &programs) - { - string build_options = "-D__SPLIT_KERNEL__ "; -#ifdef __WORK_STEALING__ - build_options += "-D__WORK_STEALING__ "; -#endif - build_options += requested_features.get_build_options(); - - /* Set compute device build option. */ - cl_device_type device_type; - ciErr = clGetDeviceInfo(cdDevice, - CL_DEVICE_TYPE, - sizeof(cl_device_type), - &device_type, - NULL); - assert(ciErr == CL_SUCCESS); - if(device_type == CL_DEVICE_TYPE_GPU) { - build_options += " -D__COMPUTE_DEVICE_GPU__"; - } - -#define GLUE(a, b) a ## b -#define LOAD_KERNEL(name) \ - do { \ - GLUE(program_, name) = OpenCLProgram(this, "split_" #name, "kernel_" #name ".cl", build_options); \ - GLUE(program_, name).add_kernel(ustring("path_trace_" #name)); \ - programs.push_back(&GLUE(program_, name)); \ - } while(false) - - LOAD_KERNEL(data_init); - LOAD_KERNEL(scene_intersect); - LOAD_KERNEL(lamp_emission); - LOAD_KERNEL(queue_enqueue); - LOAD_KERNEL(background_buffer_update); - LOAD_KERNEL(shader_eval); - LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao); - LOAD_KERNEL(direct_lighting); - LOAD_KERNEL(shadow_blocked); - LOAD_KERNEL(next_iteration_setup); - LOAD_KERNEL(sum_all_radiance); - -#undef FIND_KERNEL -#undef GLUE - - current_max_closure = requested_features.max_closure; - } + OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, bool background_); ~OpenCLDeviceSplitKernel() { @@ -347,960 +70,298 @@ public: /* Release kernels */ program_data_init.release(); - program_scene_intersect.release(); - program_lamp_emission.release(); - program_queue_enqueue.release(); - program_background_buffer_update.release(); - program_shader_eval.release(); - program_holdout_emission_blurring_pathtermination_ao.release(); - program_direct_lighting.release(); - program_shadow_blocked.release(); - program_next_iteration_setup.release(); - program_sum_all_radiance.release(); - - /* Release global memory */ - release_mem_object_safe(rng_coop); - release_mem_object_safe(throughput_coop); - release_mem_object_safe(L_transparent_coop); - release_mem_object_safe(PathRadiance_coop); - release_mem_object_safe(Ray_coop); - release_mem_object_safe(PathState_coop); - release_mem_object_safe(Intersection_coop); - release_mem_object_safe(kgbuffer); - release_mem_object_safe(sd); - release_mem_object_safe(sd_DL_shadow); - release_mem_object_safe(ray_state); - release_mem_object_safe(AOAlpha_coop); - release_mem_object_safe(AOBSDF_coop); - release_mem_object_safe(AOLightRay_coop); - release_mem_object_safe(BSDFEval_coop); - release_mem_object_safe(ISLamp_coop); - release_mem_object_safe(LightRay_coop); - release_mem_object_safe(Intersection_coop_shadow); -#ifdef WITH_CYCLES_DEBUG - release_mem_object_safe(debugdata_coop); -#endif - release_mem_object_safe(use_queues_flag); - release_mem_object_safe(Queue_data); - release_mem_object_safe(Queue_index); - release_mem_object_safe(work_array); -#ifdef __WORK_STEALING__ - release_mem_object_safe(work_pool_wgs); -#endif - release_mem_object_safe(per_sample_output_buffers); - - if(hostRayStateArray != NULL) { - free(hostRayStateArray); - } + + delete split_kernel; } - void path_trace(DeviceTask *task, - SplitRenderTile& rtile, - int2 max_render_feasible_tile_size) + virtual bool load_kernels(const DeviceRequestedFeatures& requested_features, + vector<OpenCLDeviceBase::OpenCLProgram*> &programs) { - /* cast arguments to cl types */ - cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer); - cl_mem d_buffer = CL_MEM_PTR(rtile.buffer); - cl_mem d_rng_state = CL_MEM_PTR(rtile.rng_state); - cl_int d_x = rtile.x; - cl_int d_y = rtile.y; - cl_int d_w = rtile.w; - cl_int d_h = rtile.h; - cl_int d_offset = rtile.offset; - cl_int d_stride = rtile.stride; - - /* Make sure that set render feasible tile size is a multiple of local - * work size dimensions. - */ - assert(max_render_feasible_tile_size.x % SPLIT_KERNEL_LOCAL_SIZE_X == 0); - assert(max_render_feasible_tile_size.y % SPLIT_KERNEL_LOCAL_SIZE_Y == 0); - - size_t global_size[2]; - size_t local_size[2] = {SPLIT_KERNEL_LOCAL_SIZE_X, - SPLIT_KERNEL_LOCAL_SIZE_Y}; + program_data_init = OpenCLDeviceBase::OpenCLProgram(this, + "split_data_init", + "kernel_data_init.cl", + get_build_options(this, requested_features)); + program_data_init.add_kernel(ustring("path_trace_data_init")); + programs.push_back(&program_data_init); + + program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram(this, + "split_state_buffer_size", + "kernel_state_buffer_size.cl", + get_build_options(this, requested_features)); + program_state_buffer_size.add_kernel(ustring("path_trace_state_buffer_size")); + programs.push_back(&program_state_buffer_size); + + return split_kernel->load_kernels(requested_features); + } - /* Set the range of samples to be processed for every ray in - * path-regeneration logic. - */ - cl_int start_sample = rtile.start_sample; - cl_int end_sample = rtile.start_sample + rtile.num_samples; - cl_int num_samples = rtile.num_samples; - -#ifdef __WORK_STEALING__ - global_size[0] = (((d_w - 1) / local_size[0]) + 1) * local_size[0]; - global_size[1] = (((d_h - 1) / local_size[1]) + 1) * local_size[1]; - unsigned int num_parallel_samples = 1; -#else - global_size[1] = (((d_h - 1) / local_size[1]) + 1) * local_size[1]; - unsigned int num_threads = max_render_feasible_tile_size.x * - max_render_feasible_tile_size.y; - unsigned int num_tile_columns_possible = num_threads / global_size[1]; - /* Estimate number of parallel samples that can be - * processed in parallel. - */ - unsigned int num_parallel_samples = min(num_tile_columns_possible / d_w, - rtile.num_samples); - /* Wavefront size in AMD is 64. - * TODO(sergey): What about other platforms? - */ - if(num_parallel_samples >= 64) { - /* TODO(sergey): Could use generic round-up here. */ - num_parallel_samples = (num_parallel_samples / 64) * 64; + void thread_run(DeviceTask *task) + { + if(task->type == DeviceTask::FILM_CONVERT) { + film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half); } - assert(num_parallel_samples != 0); - - global_size[0] = d_w * num_parallel_samples; -#endif /* __WORK_STEALING__ */ - - assert(global_size[0] * global_size[1] <= - max_render_feasible_tile_size.x * max_render_feasible_tile_size.y); - - /* Allocate all required global memory once. */ - if(first_tile) { - size_t num_global_elements = max_render_feasible_tile_size.x * - max_render_feasible_tile_size.y; - /* TODO(sergey): This will actually over-allocate if - * particular kernel does not support multiclosure. - */ - size_t shaderdata_size = get_shader_data_size(current_max_closure); - -#ifdef __WORK_STEALING__ - /* Calculate max groups */ - size_t max_global_size[2]; - size_t tile_x = max_render_feasible_tile_size.x; - size_t tile_y = max_render_feasible_tile_size.y; - max_global_size[0] = (((tile_x - 1) / local_size[0]) + 1) * local_size[0]; - max_global_size[1] = (((tile_y - 1) / local_size[1]) + 1) * local_size[1]; - max_work_groups = (max_global_size[0] * max_global_size[1]) / - (local_size[0] * local_size[1]); - /* Allocate work_pool_wgs memory. */ - work_pool_wgs = mem_alloc(max_work_groups * sizeof(unsigned int)); -#endif /* __WORK_STEALING__ */ - - /* Allocate queue_index memory only once. */ - Queue_index = mem_alloc(NUM_QUEUES * sizeof(int)); - use_queues_flag = mem_alloc(sizeof(char)); - kgbuffer = mem_alloc(get_KernelGlobals_size()); - - /* Create global buffers for ShaderData. */ - sd = mem_alloc(num_global_elements * shaderdata_size); - sd_DL_shadow = mem_alloc(num_global_elements * 2 * shaderdata_size); - - /* Creation of global memory buffers which are shared among - * the kernels. - */ - rng_coop = mem_alloc(num_global_elements * sizeof(RNG)); - throughput_coop = mem_alloc(num_global_elements * sizeof(float3)); - L_transparent_coop = mem_alloc(num_global_elements * sizeof(float)); - PathRadiance_coop = mem_alloc(num_global_elements * sizeof(PathRadiance)); - Ray_coop = mem_alloc(num_global_elements * sizeof(Ray)); - PathState_coop = mem_alloc(num_global_elements * sizeof(PathState)); - Intersection_coop = mem_alloc(num_global_elements * sizeof(Intersection)); - AOAlpha_coop = mem_alloc(num_global_elements * sizeof(float3)); - AOBSDF_coop = mem_alloc(num_global_elements * sizeof(float3)); - AOLightRay_coop = mem_alloc(num_global_elements * sizeof(Ray)); - BSDFEval_coop = mem_alloc(num_global_elements * sizeof(BsdfEval)); - ISLamp_coop = mem_alloc(num_global_elements * sizeof(int)); - LightRay_coop = mem_alloc(num_global_elements * sizeof(Ray)); - Intersection_coop_shadow = mem_alloc(2 * num_global_elements * sizeof(Intersection)); - -#ifdef WITH_CYCLES_DEBUG - debugdata_coop = mem_alloc(num_global_elements * sizeof(DebugData)); -#endif - - ray_state = mem_alloc(num_global_elements * sizeof(char)); - - hostRayStateArray = (char *)calloc(num_global_elements, sizeof(char)); - assert(hostRayStateArray != NULL && "Can't create hostRayStateArray memory"); - - Queue_data = mem_alloc(num_global_elements * (NUM_QUEUES * sizeof(int)+sizeof(int))); - work_array = mem_alloc(num_global_elements * sizeof(unsigned int)); - per_sample_output_buffers = mem_alloc(num_global_elements * - per_thread_output_buffer_size); + else if(task->type == DeviceTask::SHADER) { + shader(*task); } + else if(task->type == DeviceTask::PATH_TRACE) { + RenderTile tile; - cl_int dQueue_size = global_size[0] * global_size[1]; - - cl_uint start_arg_index = - kernel_set_args(program_data_init(), - 0, - kgbuffer, - sd_DL_shadow, - d_data, - per_sample_output_buffers, - d_rng_state, - rng_coop, - throughput_coop, - L_transparent_coop, - PathRadiance_coop, - Ray_coop, - PathState_coop, - Intersection_coop_shadow, - ray_state); - -/* TODO(sergey): Avoid map lookup here. */ + /* Copy dummy KernelGlobals related to OpenCL from kernel_globals.h to + * fetch its size. + */ + typedef struct KernelGlobals { + ccl_constant KernelData *data; #define KERNEL_TEX(type, ttype, name) \ - set_kernel_arg_mem(program_data_init(), &start_arg_index, #name); + ccl_global type *name; #include "kernel_textures.h" #undef KERNEL_TEX + SplitData split_data; + SplitParams split_param_data; + } KernelGlobals; - start_arg_index += - kernel_set_args(program_data_init(), - start_arg_index, - start_sample, - d_x, - d_y, - d_w, - d_h, - d_offset, - d_stride, - rtile.rng_state_offset_x, - rtile.rng_state_offset_y, - rtile.buffer_rng_state_stride, - Queue_data, - Queue_index, - dQueue_size, - use_queues_flag, - work_array, -#ifdef __WORK_STEALING__ - work_pool_wgs, - num_samples, -#endif -#ifdef WITH_CYCLES_DEBUG - debugdata_coop, -#endif - num_parallel_samples); - - kernel_set_args(program_scene_intersect(), - 0, - kgbuffer, - d_data, - rng_coop, - Ray_coop, - PathState_coop, - Intersection_coop, - ray_state, - d_w, - d_h, - Queue_data, - Queue_index, - dQueue_size, - use_queues_flag, -#ifdef WITH_CYCLES_DEBUG - debugdata_coop, -#endif - num_parallel_samples); - - kernel_set_args(program_lamp_emission(), - 0, - kgbuffer, - d_data, - throughput_coop, - PathRadiance_coop, - Ray_coop, - PathState_coop, - Intersection_coop, - ray_state, - d_w, - d_h, - Queue_data, - Queue_index, - dQueue_size, - use_queues_flag, - num_parallel_samples); - - kernel_set_args(program_queue_enqueue(), - 0, - Queue_data, - Queue_index, - ray_state, - dQueue_size); - - kernel_set_args(program_background_buffer_update(), - 0, - kgbuffer, - d_data, - per_sample_output_buffers, - d_rng_state, - rng_coop, - throughput_coop, - PathRadiance_coop, - Ray_coop, - PathState_coop, - L_transparent_coop, - ray_state, - d_w, - d_h, - d_x, - d_y, - d_stride, - rtile.rng_state_offset_x, - rtile.rng_state_offset_y, - rtile.buffer_rng_state_stride, - work_array, - Queue_data, - Queue_index, - dQueue_size, - end_sample, - start_sample, -#ifdef __WORK_STEALING__ - work_pool_wgs, - num_samples, -#endif -#ifdef WITH_CYCLES_DEBUG - debugdata_coop, -#endif - num_parallel_samples); - - kernel_set_args(program_shader_eval(), - 0, - kgbuffer, - d_data, - sd, - rng_coop, - Ray_coop, - PathState_coop, - Intersection_coop, - ray_state, - Queue_data, - Queue_index, - dQueue_size); - - kernel_set_args(program_holdout_emission_blurring_pathtermination_ao(), - 0, - kgbuffer, - d_data, - sd, - per_sample_output_buffers, - rng_coop, - throughput_coop, - L_transparent_coop, - PathRadiance_coop, - PathState_coop, - Intersection_coop, - AOAlpha_coop, - AOBSDF_coop, - AOLightRay_coop, - d_w, - d_h, - d_x, - d_y, - d_stride, - ray_state, - work_array, - Queue_data, - Queue_index, - dQueue_size, -#ifdef __WORK_STEALING__ - start_sample, -#endif - num_parallel_samples); - - kernel_set_args(program_direct_lighting(), - 0, - kgbuffer, - d_data, - sd, - rng_coop, - PathState_coop, - ISLamp_coop, - LightRay_coop, - BSDFEval_coop, - ray_state, - Queue_data, - Queue_index, - dQueue_size); - - kernel_set_args(program_shadow_blocked(), - 0, - kgbuffer, - d_data, - PathState_coop, - LightRay_coop, - AOLightRay_coop, - ray_state, - Queue_data, - Queue_index, - dQueue_size); - - kernel_set_args(program_next_iteration_setup(), - 0, - kgbuffer, - d_data, - sd, - rng_coop, - throughput_coop, - PathRadiance_coop, - Ray_coop, - PathState_coop, - LightRay_coop, - ISLamp_coop, - BSDFEval_coop, - AOLightRay_coop, - AOBSDF_coop, - AOAlpha_coop, - ray_state, - Queue_data, - Queue_index, - dQueue_size, - use_queues_flag); - - kernel_set_args(program_sum_all_radiance(), - 0, - d_data, - d_buffer, - per_sample_output_buffers, - num_parallel_samples, - d_w, - d_h, - d_stride, - rtile.buffer_offset_x, - rtile.buffer_offset_y, - rtile.buffer_rng_state_stride, - start_sample); - - /* Macro for Enqueuing split kernels. */ -#define GLUE(a, b) a ## b -#define ENQUEUE_SPLIT_KERNEL(kernelName, globalSize, localSize) \ - { \ - ciErr = clEnqueueNDRangeKernel(cqCommandQueue, \ - GLUE(program_, \ - kernelName)(), \ - 2, \ - NULL, \ - globalSize, \ - localSize, \ - 0, \ - NULL, \ - NULL); \ - opencl_assert_err(ciErr, "clEnqueueNDRangeKernel"); \ - if(ciErr != CL_SUCCESS) { \ - string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", \ - clewErrorString(ciErr)); \ - opencl_error(message); \ - return; \ - } \ - } (void) 0 - - /* Enqueue ckPathTraceKernel_data_init kernel. */ - ENQUEUE_SPLIT_KERNEL(data_init, global_size, local_size); - bool activeRaysAvailable = true; - - /* Record number of time host intervention has been made */ - unsigned int numHostIntervention = 0; - unsigned int numNextPathIterTimes = PathIteration_times; - bool canceled = false; - while(activeRaysAvailable) { - /* Twice the global work size of other kernels for - * ckPathTraceKernel_shadow_blocked_direct_lighting. */ - size_t global_size_shadow_blocked[2]; - global_size_shadow_blocked[0] = global_size[0] * 2; - global_size_shadow_blocked[1] = global_size[1]; - - /* Do path-iteration in host [Enqueue Path-iteration kernels. */ - for(int PathIter = 0; PathIter < PathIteration_times; PathIter++) { - ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size); - ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size); - ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size); - ENQUEUE_SPLIT_KERNEL(background_buffer_update, global_size, local_size); - ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size); - ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size); - ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size); - ENQUEUE_SPLIT_KERNEL(shadow_blocked, global_size_shadow_blocked, local_size); - ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size); - - if(task->get_cancel()) { - canceled = true; - break; - } - } + /* Allocate buffer for kernel globals */ + device_memory kgbuffer; + kgbuffer.resize(sizeof(KernelGlobals)); + mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE); - /* Read ray-state into Host memory to decide if we should exit - * path-iteration in host. - */ - ciErr = clEnqueueReadBuffer(cqCommandQueue, - ray_state, - CL_TRUE, - 0, - global_size[0] * global_size[1] * sizeof(char), - hostRayStateArray, - 0, - NULL, - NULL); - assert(ciErr == CL_SUCCESS); - - activeRaysAvailable = false; - - for(int rayStateIter = 0; - rayStateIter < global_size[0] * global_size[1]; - ++rayStateIter) - { - if(int8_t(hostRayStateArray[rayStateIter]) != RAY_INACTIVE) { - /* Not all rays are RAY_INACTIVE. */ - activeRaysAvailable = true; - break; - } - } + /* Keep rendering tiles until done. */ + while(task->acquire_tile(this, tile)) { + split_kernel->path_trace(task, + tile, + kgbuffer, + *const_mem_map["__data"]); - if(activeRaysAvailable) { - numHostIntervention++; - PathIteration_times = PATH_ITER_INC_FACTOR; - /* Host intervention done before all rays become RAY_INACTIVE; - * Set do more initial iterations for the next tile. + /* Complete kernel execution before release tile. */ + /* This helps in multi-device render; + * The device that reaches the critical-section function + * release_tile waits (stalling other devices from entering + * release_tile) for all kernels to complete. If device1 (a + * slow-render device) reaches release_tile first then it would + * stall device2 (a fast-render device) from proceeding to render + * next tile. */ - numNextPathIterTimes += PATH_ITER_INC_FACTOR; - } + clFinish(cqCommandQueue); - if(task->get_cancel()) { - canceled = true; - break; + task->release_tile(tile); } - } - /* Execute SumALLRadiance kernel to accumulate radiance calculated in - * per_sample_output_buffers into RenderTile's output buffer. - */ - if(!canceled) { - size_t sum_all_radiance_local_size[2] = {16, 16}; - size_t sum_all_radiance_global_size[2]; - sum_all_radiance_global_size[0] = - (((d_w - 1) / sum_all_radiance_local_size[0]) + 1) * - sum_all_radiance_local_size[0]; - sum_all_radiance_global_size[1] = - (((d_h - 1) / sum_all_radiance_local_size[1]) + 1) * - sum_all_radiance_local_size[1]; - ENQUEUE_SPLIT_KERNEL(sum_all_radiance, - sum_all_radiance_global_size, - sum_all_radiance_local_size); - } - -#undef ENQUEUE_SPLIT_KERNEL -#undef GLUE - - if(numHostIntervention == 0) { - /* This means that we are executing kernel more than required - * Must avoid this for the next sample/tile. - */ - PathIteration_times = ((numNextPathIterTimes - PATH_ITER_INC_FACTOR) <= 0) ? - PATH_ITER_INC_FACTOR : numNextPathIterTimes - PATH_ITER_INC_FACTOR; + mem_free(kgbuffer); } - else { - /* Number of path-iterations done for this tile is set as - * Initial path-iteration times for the next tile - */ - PathIteration_times = numNextPathIterTimes; - } - - first_tile = false; } - /* Calculates the amount of memory that has to be always - * allocated in order for the split kernel to function. - * This memory is tile/scene-property invariant (meaning, - * the value returned by this function does not depend - * on the user set tile size or scene properties. - */ - size_t get_invariable_mem_allocated() - { - size_t total_invariable_mem_allocated = 0; - size_t KernelGlobals_size = 0; - - KernelGlobals_size = get_KernelGlobals_size(); - - total_invariable_mem_allocated += KernelGlobals_size; /* KernelGlobals size */ - total_invariable_mem_allocated += NUM_QUEUES * sizeof(unsigned int); /* Queue index size */ - total_invariable_mem_allocated += sizeof(char); /* use_queues_flag size */ - - return total_invariable_mem_allocated; - } +protected: + /* ** Those guys are for workign around some compiler-specific bugs ** */ - /* Calculate the memory that has-to-be/has-been allocated for - * the split kernel to function. - */ - size_t get_tile_specific_mem_allocated(const int2 tile_size) + string build_options_for_base_program( + const DeviceRequestedFeatures& requested_features) { - size_t tile_specific_mem_allocated = 0; - - /* Get required tile info */ - unsigned int user_set_tile_w = tile_size.x; - unsigned int user_set_tile_h = tile_size.y; - -#ifdef __WORK_STEALING__ - /* Calculate memory to be allocated for work_pools in - * case of work_stealing. - */ - size_t max_global_size[2]; - size_t max_num_work_pools = 0; - max_global_size[0] = - (((user_set_tile_w - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) * - SPLIT_KERNEL_LOCAL_SIZE_X; - max_global_size[1] = - (((user_set_tile_h - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) * - SPLIT_KERNEL_LOCAL_SIZE_Y; - max_num_work_pools = - (max_global_size[0] * max_global_size[1]) / - (SPLIT_KERNEL_LOCAL_SIZE_X * SPLIT_KERNEL_LOCAL_SIZE_Y); - tile_specific_mem_allocated += max_num_work_pools * sizeof(unsigned int); -#endif - - tile_specific_mem_allocated += - user_set_tile_w * user_set_tile_h * per_thread_output_buffer_size; - tile_specific_mem_allocated += - user_set_tile_w * user_set_tile_h * sizeof(RNG); - - return tile_specific_mem_allocated; + return requested_features.get_build_options(); } - /* Calculates the texture memories and KernelData (d_data) memory - * that has been allocated. - */ - size_t get_scene_specific_mem_allocated(cl_mem d_data) - { - size_t scene_specific_mem_allocated = 0; - /* Calculate texture memories. */ -#define KERNEL_TEX(type, ttype, name) \ - scene_specific_mem_allocated += get_tex_size(#name); -#include "kernel_textures.h" -#undef KERNEL_TEX - size_t d_data_size; - ciErr = clGetMemObjectInfo(d_data, - CL_MEM_SIZE, - sizeof(d_data_size), - &d_data_size, - NULL); - assert(ciErr == CL_SUCCESS && "Can't get d_data mem object info"); - scene_specific_mem_allocated += d_data_size; - return scene_specific_mem_allocated; - } + friend class OpenCLSplitKernel; + friend class OpenCLSplitKernelFunction; +}; - /* Calculate the memory required for one thread in split kernel. */ - size_t get_per_thread_memory() - { - size_t shaderdata_size = 0; - /* TODO(sergey): This will actually over-allocate if - * particular kernel does not support multiclosure. - */ - shaderdata_size = get_shader_data_size(current_max_closure); - size_t retval = sizeof(RNG) - + sizeof(float3) /* Throughput size */ - + sizeof(float) /* L transparent size */ - + sizeof(char) /* Ray state size */ - + sizeof(unsigned int) /* Work element size */ - + sizeof(int) /* ISLamp_size */ - + sizeof(PathRadiance) + sizeof(Ray) + sizeof(PathState) - + sizeof(Intersection) /* Overall isect */ - + sizeof(Intersection) /* Instersection_coop_AO */ - + sizeof(Intersection) /* Intersection coop DL */ - + shaderdata_size /* Overall ShaderData */ - + (shaderdata_size * 2) /* ShaderData : DL and shadow */ - + sizeof(Ray) + sizeof(BsdfEval) - + sizeof(float3) /* AOAlpha size */ - + sizeof(float3) /* AOBSDF size */ - + sizeof(Ray) - + (sizeof(int) * NUM_QUEUES) - + per_thread_output_buffer_size; - return retval; - } +class OpenCLSplitKernelFunction : public SplitKernelFunction { +public: + OpenCLDeviceSplitKernel* device; + OpenCLDeviceBase::OpenCLProgram program; - /* Considers the total memory available in the device and - * and returns the maximum global work size possible. - */ - size_t get_feasible_global_work_size(int2 tile_size, cl_mem d_data) - { - /* Calculate invariably allocated memory. */ - size_t invariable_mem_allocated = get_invariable_mem_allocated(); - /* Calculate tile specific allocated memory. */ - size_t tile_specific_mem_allocated = - get_tile_specific_mem_allocated(tile_size); - /* Calculate scene specific allocated memory. */ - size_t scene_specific_mem_allocated = - get_scene_specific_mem_allocated(d_data); - /* Calculate total memory available for the threads in global work size. */ - size_t available_memory = total_allocatable_memory - - invariable_mem_allocated - - tile_specific_mem_allocated - - scene_specific_mem_allocated - - DATA_ALLOCATION_MEM_FACTOR; - size_t per_thread_memory_required = get_per_thread_memory(); - return (available_memory / per_thread_memory_required); - } + OpenCLSplitKernelFunction(OpenCLDeviceSplitKernel* device) : device(device) {} + ~OpenCLSplitKernelFunction() { program.release(); } - /* Checks if the device has enough memory to render the whole tile; - * If not, we should split single tile into multiple tiles of small size - * and process them all. - */ - bool need_to_split_tile(unsigned int d_w, - unsigned int d_h, - int2 max_render_feasible_tile_size) + virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) { - size_t global_size_estimate[2]; - /* TODO(sergey): Such round-ups are in quite few places, need to replace - * them with an utility macro. - */ - global_size_estimate[0] = - (((d_w - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) * - SPLIT_KERNEL_LOCAL_SIZE_X; - global_size_estimate[1] = - (((d_h - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) * - SPLIT_KERNEL_LOCAL_SIZE_Y; - if((global_size_estimate[0] * global_size_estimate[1]) > - (max_render_feasible_tile_size.x * max_render_feasible_tile_size.y)) - { - return true; - } - else { + device->kernel_set_args(program(), 0, kg, data); + + device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue, + program(), + 2, + NULL, + dim.global_size, + dim.local_size, + 0, + NULL, + NULL); + + device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel"); + + if(device->ciErr != CL_SUCCESS) { + string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", + clewErrorString(device->ciErr)); + device->opencl_error(message); return false; } + + return true; } +}; - /* Considers the scene properties, global memory available in the device - * and returns a rectanglular tile dimension (approx the maximum) - * that should render on split kernel. - */ - int2 get_max_render_feasible_tile_size(size_t feasible_global_work_size) - { - int2 max_render_feasible_tile_size; - int square_root_val = (int)sqrt(feasible_global_work_size); - max_render_feasible_tile_size.x = square_root_val; - max_render_feasible_tile_size.y = square_root_val; - /* Ciel round-off max_render_feasible_tile_size. */ - int2 ceil_render_feasible_tile_size; - ceil_render_feasible_tile_size.x = - (((max_render_feasible_tile_size.x - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) * - SPLIT_KERNEL_LOCAL_SIZE_X; - ceil_render_feasible_tile_size.y = - (((max_render_feasible_tile_size.y - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) * - SPLIT_KERNEL_LOCAL_SIZE_Y; - if(ceil_render_feasible_tile_size.x * ceil_render_feasible_tile_size.y <= - feasible_global_work_size) - { - return ceil_render_feasible_tile_size; - } - /* Floor round-off max_render_feasible_tile_size. */ - int2 floor_render_feasible_tile_size; - floor_render_feasible_tile_size.x = - (max_render_feasible_tile_size.x / SPLIT_KERNEL_LOCAL_SIZE_X) * - SPLIT_KERNEL_LOCAL_SIZE_X; - floor_render_feasible_tile_size.y = - (max_render_feasible_tile_size.y / SPLIT_KERNEL_LOCAL_SIZE_Y) * - SPLIT_KERNEL_LOCAL_SIZE_Y; - return floor_render_feasible_tile_size; +class OpenCLSplitKernel : public DeviceSplitKernel { + OpenCLDeviceSplitKernel *device; +public: + explicit OpenCLSplitKernel(OpenCLDeviceSplitKernel *device) : DeviceSplitKernel(device), device(device) { } - /* Try splitting the current tile into multiple smaller - * almost-square-tiles. - */ - int2 get_split_tile_size(RenderTile rtile, - int2 max_render_feasible_tile_size) + virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, + const DeviceRequestedFeatures& requested_features) { - int2 split_tile_size; - int num_global_threads = max_render_feasible_tile_size.x * - max_render_feasible_tile_size.y; - int d_w = rtile.w; - int d_h = rtile.h; - /* Ceil round off d_w and d_h */ - d_w = (((d_w - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) * - SPLIT_KERNEL_LOCAL_SIZE_X; - d_h = (((d_h - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) * - SPLIT_KERNEL_LOCAL_SIZE_Y; - while(d_w * d_h > num_global_threads) { - /* Halve the longer dimension. */ - if(d_w >= d_h) { - d_w = d_w / 2; - d_w = (((d_w - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) * - SPLIT_KERNEL_LOCAL_SIZE_X; - } - else { - d_h = d_h / 2; - d_h = (((d_h - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) * - SPLIT_KERNEL_LOCAL_SIZE_Y; - } + OpenCLSplitKernelFunction* kernel = new OpenCLSplitKernelFunction(device); + + kernel->program = OpenCLDeviceBase::OpenCLProgram(device, + "split_" + kernel_name, + "kernel_" + kernel_name + ".cl", + get_build_options(device, requested_features)); + kernel->program.add_kernel(ustring("path_trace_" + kernel_name)); + kernel->program.load(); + + if(!kernel->program.is_loaded()) { + delete kernel; + return NULL; } - split_tile_size.x = d_w; - split_tile_size.y = d_h; - return split_tile_size; + + return kernel; } - /* Splits existing tile into multiple tiles of tile size split_tile_size. */ - vector<SplitRenderTile> split_tiles(RenderTile rtile, int2 split_tile_size) + virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) { - vector<SplitRenderTile> to_path_trace_rtile; - int d_w = rtile.w; - int d_h = rtile.h; - int num_tiles_x = (((d_w - 1) / split_tile_size.x) + 1); - int num_tiles_y = (((d_h - 1) / split_tile_size.y) + 1); - /* Buffer and rng_state offset calc. */ - size_t offset_index = rtile.offset + (rtile.x + rtile.y * rtile.stride); - size_t offset_x = offset_index % rtile.stride; - size_t offset_y = offset_index / rtile.stride; - /* Resize to_path_trace_rtile. */ - to_path_trace_rtile.resize(num_tiles_x * num_tiles_y); - for(int tile_iter_y = 0; tile_iter_y < num_tiles_y; tile_iter_y++) { - for(int tile_iter_x = 0; tile_iter_x < num_tiles_x; tile_iter_x++) { - int rtile_index = tile_iter_y * num_tiles_x + tile_iter_x; - to_path_trace_rtile[rtile_index].rng_state_offset_x = offset_x + tile_iter_x * split_tile_size.x; - to_path_trace_rtile[rtile_index].rng_state_offset_y = offset_y + tile_iter_y * split_tile_size.y; - to_path_trace_rtile[rtile_index].buffer_offset_x = offset_x + tile_iter_x * split_tile_size.x; - to_path_trace_rtile[rtile_index].buffer_offset_y = offset_y + tile_iter_y * split_tile_size.y; - to_path_trace_rtile[rtile_index].start_sample = rtile.start_sample; - to_path_trace_rtile[rtile_index].num_samples = rtile.num_samples; - to_path_trace_rtile[rtile_index].sample = rtile.sample; - to_path_trace_rtile[rtile_index].resolution = rtile.resolution; - to_path_trace_rtile[rtile_index].offset = rtile.offset; - to_path_trace_rtile[rtile_index].buffers = rtile.buffers; - to_path_trace_rtile[rtile_index].buffer = rtile.buffer; - to_path_trace_rtile[rtile_index].rng_state = rtile.rng_state; - to_path_trace_rtile[rtile_index].x = rtile.x + (tile_iter_x * split_tile_size.x); - to_path_trace_rtile[rtile_index].y = rtile.y + (tile_iter_y * split_tile_size.y); - to_path_trace_rtile[rtile_index].buffer_rng_state_stride = rtile.stride; - /* Fill width and height of the new render tile. */ - to_path_trace_rtile[rtile_index].w = (tile_iter_x == (num_tiles_x - 1)) ? - (d_w - (tile_iter_x * split_tile_size.x)) /* Border tile */ - : split_tile_size.x; - to_path_trace_rtile[rtile_index].h = (tile_iter_y == (num_tiles_y - 1)) ? - (d_h - (tile_iter_y * split_tile_size.y)) /* Border tile */ - : split_tile_size.y; - to_path_trace_rtile[rtile_index].stride = to_path_trace_rtile[rtile_index].w; - } + device_vector<uint> size_buffer; + size_buffer.resize(1); + device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE); + + uint threads = num_threads; + device->kernel_set_args(device->program_state_buffer_size(), 0, kg, data, threads, size_buffer); + + size_t global_size = 64; + device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue, + device->program_state_buffer_size(), + 1, + NULL, + &global_size, + NULL, + 0, + NULL, + NULL); + + device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel"); + + device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint)); + device->mem_free(size_buffer); + + if(device->ciErr != CL_SUCCESS) { + string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", + clewErrorString(device->ciErr)); + device->opencl_error(message); + return 0; } - return to_path_trace_rtile; + + return *size_buffer.get_data(); } - void thread_run(DeviceTask *task) + virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim, + RenderTile& rtile, + int num_global_elements, + device_memory& kernel_globals, + device_memory& kernel_data, + device_memory& split_data, + device_memory& ray_state, + device_memory& queue_index, + device_memory& use_queues_flag, + device_memory& work_pool_wgs + ) { - if(task->type == DeviceTask::FILM_CONVERT) { - film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half); - } - else if(task->type == DeviceTask::SHADER) { - shader(*task); - } - else if(task->type == DeviceTask::PATH_TRACE) { - RenderTile tile; - bool initialize_data_and_check_render_feasibility = false; - bool need_to_split_tiles_further = false; - int2 max_render_feasible_tile_size; - size_t feasible_global_work_size; - const int2 tile_size = task->requested_tile_size; - /* Keep rendering tiles until done. */ - while(task->acquire_tile(this, tile)) { - if(!initialize_data_and_check_render_feasibility) { - /* Initialize data. */ - /* Calculate per_thread_output_buffer_size. */ - size_t output_buffer_size = 0; - ciErr = clGetMemObjectInfo((cl_mem)tile.buffer, - CL_MEM_SIZE, - sizeof(output_buffer_size), - &output_buffer_size, - NULL); - assert(ciErr == CL_SUCCESS && "Can't get tile.buffer mem object info"); - /* This value is different when running on AMD and NV. */ - if(background) { - /* In offline render the number of buffer elements - * associated with tile.buffer is the current tile size. - */ - per_thread_output_buffer_size = - output_buffer_size / (tile.w * tile.h); - } - else { - /* interactive rendering, unlike offline render, the number of buffer elements - * associated with tile.buffer is the entire viewport size. - */ - per_thread_output_buffer_size = - output_buffer_size / (tile.buffers->params.width * - tile.buffers->params.height); - } - /* Check render feasibility. */ - feasible_global_work_size = get_feasible_global_work_size( - tile_size, - CL_MEM_PTR(const_mem_map["__data"]->device_pointer)); - max_render_feasible_tile_size = - get_max_render_feasible_tile_size( - feasible_global_work_size); - need_to_split_tiles_further = - need_to_split_tile(tile_size.x, - tile_size.y, - max_render_feasible_tile_size); - initialize_data_and_check_render_feasibility = true; - } - if(need_to_split_tiles_further) { - int2 split_tile_size = - get_split_tile_size(tile, - max_render_feasible_tile_size); - vector<SplitRenderTile> to_path_trace_render_tiles = - split_tiles(tile, split_tile_size); - /* Print message to console */ - if(background && (to_path_trace_render_tiles.size() > 1)) { - fprintf(stderr, "Message : Tiles need to be split " - "further inside path trace (due to insufficient " - "device-global-memory for split kernel to " - "function) \n" - "The current tile of dimensions %dx%d is split " - "into tiles of dimension %dx%d for render \n", - tile.w, tile.h, - split_tile_size.x, - split_tile_size.y); - } - /* Process all split tiles. */ - for(int tile_iter = 0; - tile_iter < to_path_trace_render_tiles.size(); - ++tile_iter) - { - path_trace(task, - to_path_trace_render_tiles[tile_iter], - max_render_feasible_tile_size); - } - } - else { - /* No splitting required; process the entire tile at once. */ - /* Render feasible tile size is user-set-tile-size itself. */ - max_render_feasible_tile_size.x = - (((tile_size.x - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) * - SPLIT_KERNEL_LOCAL_SIZE_X; - max_render_feasible_tile_size.y = - (((tile_size.y - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) * - SPLIT_KERNEL_LOCAL_SIZE_Y; - /* buffer_rng_state_stride is stride itself. */ - SplitRenderTile split_tile(tile); - split_tile.buffer_rng_state_stride = tile.stride; - path_trace(task, split_tile, max_render_feasible_tile_size); - } - tile.sample = tile.start_sample + tile.num_samples; + cl_int dQueue_size = dim.global_size[0] * dim.global_size[1]; - /* Complete kernel execution before release tile. */ - /* This helps in multi-device render; - * The device that reaches the critical-section function - * release_tile waits (stalling other devices from entering - * release_tile) for all kernels to complete. If device1 (a - * slow-render device) reaches release_tile first then it would - * stall device2 (a fast-render device) from proceeding to render - * next tile. - */ - clFinish(cqCommandQueue); + /* Set the range of samples to be processed for every ray in + * path-regeneration logic. + */ + cl_int start_sample = rtile.start_sample; + cl_int end_sample = rtile.start_sample + rtile.num_samples; - task->release_tile(tile); - } + cl_uint start_arg_index = + device->kernel_set_args(device->program_data_init(), + 0, + kernel_globals, + kernel_data, + split_data, + num_global_elements, + ray_state, + rtile.rng_state); + +/* TODO(sergey): Avoid map lookup here. */ +#define KERNEL_TEX(type, ttype, name) \ + device->set_kernel_arg_mem(device->program_data_init(), &start_arg_index, #name); +#include "kernel_textures.h" +#undef KERNEL_TEX + + start_arg_index += + device->kernel_set_args(device->program_data_init(), + start_arg_index, + start_sample, + end_sample, + rtile.x, + rtile.y, + rtile.w, + rtile.h, + rtile.offset, + rtile.stride, + queue_index, + dQueue_size, + use_queues_flag, + work_pool_wgs, + rtile.num_samples, + rtile.buffer); + + /* Enqueue ckPathTraceKernel_data_init kernel. */ + device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue, + device->program_data_init(), + 2, + NULL, + dim.global_size, + dim.local_size, + 0, + NULL, + NULL); + + device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel"); + + if(device->ciErr != CL_SUCCESS) { + string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", + clewErrorString(device->ciErr)); + device->opencl_error(message); + return false; } + + return true; } -protected: - cl_mem mem_alloc(size_t bufsize, cl_mem_flags mem_flag = CL_MEM_READ_WRITE) + virtual int2 split_kernel_local_size() { - cl_mem ptr; - assert(bufsize != 0); - ptr = clCreateBuffer(cxContext, mem_flag, bufsize, NULL, &ciErr); - opencl_assert_err(ciErr, "clCreateBuffer"); - return ptr; + return make_int2(64, 1); } - /* ** Those guys are for workign around some compiler-specific bugs ** */ - - string build_options_for_base_program( - const DeviceRequestedFeatures& requested_features) + virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask */*task*/) { - return requested_features.get_build_options(); + size_t max_buffer_size; + clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &max_buffer_size, NULL); + VLOG(1) << "Maximum device allocation side: " + << string_human_readable_number(max_buffer_size) << " bytes. (" + << string_human_readable_size(max_buffer_size) << ")."; + + size_t num_elements = max_elements_for_max_buffer_size(kg, data, max_buffer_size / 2); + int2 global_size = make_int2(round_down((int)sqrt(num_elements), 64), (int)sqrt(num_elements)); + VLOG(1) << "Global size: " << global_size << "."; + return global_size; } }; +OpenCLDeviceSplitKernel::OpenCLDeviceSplitKernel(DeviceInfo& info, Stats &stats, bool background_) +: OpenCLDeviceBase(info, stats, background_) +{ + split_kernel = new OpenCLSplitKernel(this); + + background = background_; +} + Device *opencl_create_split_device(DeviceInfo& info, Stats& stats, bool background) { return new OpenCLDeviceSplitKernel(info, stats, background); diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp index 82e1640e508..d5c19bf5386 100644 --- a/intern/cycles/device/opencl/opencl_util.cpp +++ b/intern/cycles/device/opencl/opencl_util.cpp @@ -19,6 +19,7 @@ #include "opencl.h" #include "util_logging.h" +#include "util_md5.h" #include "util_path.h" #include "util_time.h" @@ -309,6 +310,8 @@ bool OpenCLDeviceBase::OpenCLProgram::build_kernel(const string *debug_src) string build_options; build_options = device->kernel_build_options(debug_src) + kernel_build_options; + VLOG(1) << "Build options passed to clBuildProgram: '" + << build_options << "'."; cl_int ciErr = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL); /* show warnings even if build is successful */ @@ -336,12 +339,13 @@ bool OpenCLDeviceBase::OpenCLProgram::build_kernel(const string *debug_src) bool OpenCLDeviceBase::OpenCLProgram::compile_kernel(const string *debug_src) { - string source = "#include \"kernels/opencl/" + kernel_file + "\" // " + OpenCLCache::get_kernel_md5() + "\n"; + string source = "#include \"kernels/opencl/" + kernel_file + "\"\n"; /* We compile kernels consisting of many files. unfortunately OpenCL * kernel caches do not seem to recognize changes in included files. * so we force recompile on changes by adding the md5 hash of all files. */ source = path_source_replace_includes(source, path_get("kernel")); + source += "\n// " + util_md5_string(source) + "\n"; if(debug_src) { path_write_text(*debug_src, source); @@ -438,7 +442,11 @@ void OpenCLDeviceBase::OpenCLProgram::load() if(!program) { add_log(string("OpenCL program ") + program_name + " not found in cache.", true); - string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + OpenCLCache::get_kernel_md5(); + /* need to create source to get md5 */ + string source = "#include \"kernels/opencl/" + kernel_file + "\"\n"; + source = path_source_replace_includes(source, path_get("kernel")); + + string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + util_md5_string(source); basename = path_cache_get(path_join("kernels", basename)); string clbin = basename + ".clbin"; diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 29e0f44841e..1c740b5c6eb 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -13,8 +13,11 @@ set(INC_SYS set(SRC kernels/cpu/kernel.cpp + kernels/cpu/kernel_split.cpp kernels/opencl/kernel.cl + kernels/opencl/kernel_state_buffer_size.cl kernels/opencl/kernel_data_init.cl + kernels/opencl/kernel_path_init.cl kernels/opencl/kernel_queue_enqueue.cl kernels/opencl/kernel_scene_intersect.cl kernels/opencl/kernel_lamp_emission.cl @@ -24,8 +27,8 @@ set(SRC kernels/opencl/kernel_direct_lighting.cl kernels/opencl/kernel_shadow_blocked.cl kernels/opencl/kernel_next_iteration_setup.cl - kernels/opencl/kernel_sum_all_radiance.cl kernels/cuda/kernel.cu + kernels/cuda/kernel_split.cu ) set(SRC_BVH_HEADERS @@ -88,6 +91,10 @@ set(SRC_KERNELS_CPU_HEADERS kernels/cpu/kernel_cpu_image.h ) +set(SRC_KERNELS_CUDA_HEADERS + kernels/cuda/kernel_config.h +) + set(SRC_CLOSURE_HEADERS closure/alloc.h closure/bsdf.h @@ -195,11 +202,14 @@ set(SRC_SPLIT_HEADERS split/kernel_holdout_emission_blurring_pathtermination_ao.h split/kernel_lamp_emission.h split/kernel_next_iteration_setup.h + split/kernel_path_init.h + split/kernel_queue_enqueue.h split/kernel_scene_intersect.h split/kernel_shader_eval.h split/kernel_shadow_blocked.h split/kernel_split_common.h - split/kernel_sum_all_radiance.h + split/kernel_split_data.h + split/kernel_split_data_types.h ) # CUDA module @@ -227,8 +237,9 @@ if(WITH_CYCLES_CUDA_BINARIES) endif() # build for each arch - set(cuda_sources kernels/cuda/kernel.cu + set(cuda_sources kernels/cuda/kernel.cu kernels/cuda/kernel_split.cu ${SRC_HEADERS} + ${SRC_KERNELS_CUDA_HEADERS} ${SRC_BVH_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} @@ -237,15 +248,22 @@ if(WITH_CYCLES_CUDA_BINARIES) ) set(cuda_cubins) - macro(CYCLES_CUDA_KERNEL_ADD arch experimental) - if(${experimental}) - set(cuda_extra_flags "-D__KERNEL_EXPERIMENTAL__") - set(cuda_cubin kernel_experimental_${arch}.cubin) + macro(CYCLES_CUDA_KERNEL_ADD arch split experimental) + if(${split}) + set(cuda_extra_flags "-D__SPLIT__") + set(cuda_cubin kernel_split) else() set(cuda_extra_flags "") - set(cuda_cubin kernel_${arch}.cubin) + set(cuda_cubin kernel) + endif() + + if(${experimental}) + set(cuda_extra_flags ${cuda_extra_flags} -D__KERNEL_EXPERIMENTAL__) + set(cuda_cubin ${cuda_cubin}_experimental) endif() + set(cuda_cubin ${cuda_cubin}_${arch}.cubin) + if(WITH_CYCLES_DEBUG) set(cuda_debug_flags "-D__KERNEL_DEBUG__") else() @@ -258,13 +276,19 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${cuda_nvcc_version}") set(cuda_math_flags "--use_fast_math") + if(split) + set(cuda_kernel_src "/kernels/cuda/kernel_split.cu") + else() + set(cuda_kernel_src "/kernels/cuda/kernel.cu") + endif() + add_custom_command( OUTPUT ${cuda_cubin} COMMAND ${cuda_nvcc_command} -arch=${arch} ${CUDA_NVCC_FLAGS} -m${CUDA_BITS} - --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda/kernel.cu + --cubin ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" ${cuda_arch_flags} @@ -291,7 +315,12 @@ if(WITH_CYCLES_CUDA_BINARIES) foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) # Compile regular kernel - CYCLES_CUDA_KERNEL_ADD(${arch} FALSE) + CYCLES_CUDA_KERNEL_ADD(${arch} FALSE FALSE) + + if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES) + # Compile split kernel + CYCLES_CUDA_KERNEL_ADD(${arch} TRUE FALSE) + endif() endforeach() add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins}) @@ -314,31 +343,42 @@ if(CXX_HAS_SSE) kernels/cpu/kernel_sse2.cpp kernels/cpu/kernel_sse3.cpp kernels/cpu/kernel_sse41.cpp + kernels/cpu/kernel_split_sse2.cpp + kernels/cpu/kernel_split_sse3.cpp + kernels/cpu/kernel_split_sse41.cpp ) set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") endif() if(CXX_HAS_AVX) list(APPEND SRC kernels/cpu/kernel_avx.cpp + kernels/cpu/kernel_split_avx.cpp ) set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") endif() if(CXX_HAS_AVX2) list(APPEND SRC kernels/cpu/kernel_avx2.cpp + kernels/cpu/kernel_split_avx2.cpp ) set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") endif() add_library(cycles_kernel ${SRC} ${SRC_HEADERS} ${SRC_KERNELS_CPU_HEADERS} + ${SRC_KERNELS_CUDA_HEADERS} ${SRC_BVH_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_SVM_HEADERS} @@ -361,7 +401,9 @@ endif() #delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_state_buffer_size.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_path_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) @@ -371,9 +413,10 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emiss delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_sum_all_radiance.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel_split.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_CUDA_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/bvh) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/closure) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/svm) diff --git a/intern/cycles/kernel/closure/alloc.h b/intern/cycles/kernel/closure/alloc.h index b7abc1ec507..4894ea58dba 100644 --- a/intern/cycles/kernel/closure/alloc.h +++ b/intern/cycles/kernel/closure/alloc.h @@ -20,17 +20,17 @@ ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int size, ClosureType ty { kernel_assert(size <= sizeof(ShaderClosure)); - int num_closure = ccl_fetch(sd, num_closure); - int num_closure_extra = ccl_fetch(sd, num_closure_extra); + int num_closure = sd->num_closure; + int num_closure_extra = sd->num_closure_extra; if(num_closure + num_closure_extra >= MAX_CLOSURE) return NULL; - ShaderClosure *sc = &ccl_fetch(sd, closure)[num_closure]; + ShaderClosure *sc = &sd->closure[num_closure]; sc->type = type; sc->weight = weight; - ccl_fetch(sd, num_closure)++; + sd->num_closure++; return sc; } @@ -44,18 +44,18 @@ ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size) * This lets us keep the same fast array iteration over closures, as we * found linked list iteration and iteration with skipping to be slower. */ int num_extra = ((size + sizeof(ShaderClosure) - 1) / sizeof(ShaderClosure)); - int num_closure = ccl_fetch(sd, num_closure); - int num_closure_extra = ccl_fetch(sd, num_closure_extra) + num_extra; + int num_closure = sd->num_closure; + int num_closure_extra = sd->num_closure_extra + num_extra; if(num_closure + num_closure_extra > MAX_CLOSURE) { /* Remove previous closure. */ - ccl_fetch(sd, num_closure)--; - ccl_fetch(sd, num_closure_extra)++; + sd->num_closure--; + sd->num_closure_extra++; return NULL; } - ccl_fetch(sd, num_closure_extra) = num_closure_extra; - return (ccl_addr_space void*)(ccl_fetch(sd, closure) + MAX_CLOSURE - num_closure_extra); + sd->num_closure_extra = num_closure_extra; + return (ccl_addr_space void*)(sd->closure + MAX_CLOSURE - num_closure_extra); } ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 weight) diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h index 7e4d5fe2e37..a44b9e2d9b9 100644 --- a/intern/cycles/kernel/closure/bsdf.h +++ b/intern/cycles/kernel/closure/bsdf.h @@ -51,89 +51,89 @@ ccl_device_forceinline int bsdf_sample(KernelGlobals *kg, switch(sc->type) { case CLOSURE_BSDF_DIFFUSE_ID: case CLOSURE_BSDF_BSSRDF_ID: - label = bsdf_diffuse_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, + label = bsdf_diffuse_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; #ifdef __SVM__ case CLOSURE_BSDF_OREN_NAYAR_ID: - label = bsdf_oren_nayar_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, + label = bsdf_oren_nayar_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; #ifdef __OSL__ case CLOSURE_BSDF_PHONG_RAMP_ID: - label = bsdf_phong_ramp_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, + label = bsdf_phong_ramp_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_DIFFUSE_RAMP_ID: - label = bsdf_diffuse_ramp_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, + label = bsdf_diffuse_ramp_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; #endif case CLOSURE_BSDF_TRANSLUCENT_ID: - label = bsdf_translucent_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, + label = bsdf_translucent_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_REFLECTION_ID: - label = bsdf_reflection_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, + label = bsdf_reflection_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_REFRACTION_ID: - label = bsdf_refraction_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, + label = bsdf_refraction_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_TRANSPARENT_ID: - label = bsdf_transparent_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, + label = bsdf_transparent_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_MICROFACET_GGX_ID: case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: - label = bsdf_microfacet_ggx_sample(kg, sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, + label = bsdf_microfacet_ggx_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: - label = bsdf_microfacet_multi_ggx_sample(kg, sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf, &ccl_fetch(sd, lcg_state)); + label = bsdf_microfacet_multi_ggx_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + eval, omega_in, &domega_in->dx, &domega_in->dy, pdf, &sd->lcg_state); break; case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: - label = bsdf_microfacet_multi_ggx_glass_sample(kg, sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf, &ccl_fetch(sd, lcg_state)); + label = bsdf_microfacet_multi_ggx_glass_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, + eval, omega_in, &domega_in->dx, &domega_in->dy, pdf, &sd->lcg_state); break; case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: - label = bsdf_microfacet_beckmann_sample(kg, sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, + label = bsdf_microfacet_beckmann_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: - label = bsdf_ashikhmin_shirley_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, + label = bsdf_ashikhmin_shirley_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: - label = bsdf_ashikhmin_velvet_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, + label = bsdf_ashikhmin_velvet_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_DIFFUSE_TOON_ID: - label = bsdf_diffuse_toon_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, + label = bsdf_diffuse_toon_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_GLOSSY_TOON_ID: - label = bsdf_glossy_toon_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, + label = bsdf_glossy_toon_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_HAIR_REFLECTION_ID: - label = bsdf_hair_reflection_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, + label = bsdf_hair_reflection_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: - label = bsdf_hair_transmission_sample(sc, ccl_fetch(sd, Ng), ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, + label = bsdf_hair_transmission_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; #endif #ifdef __VOLUME__ case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: - label = volume_henyey_greenstein_sample(sc, ccl_fetch(sd, I), ccl_fetch(sd, dI).dx, ccl_fetch(sd, dI).dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); + label = volume_henyey_greenstein_sample(sc, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); break; #endif default: @@ -157,75 +157,75 @@ float3 bsdf_eval(KernelGlobals *kg, { float3 eval; - if(dot(ccl_fetch(sd, Ng), omega_in) >= 0.0f) { + if(dot(sd->Ng, omega_in) >= 0.0f) { switch(sc->type) { case CLOSURE_BSDF_DIFFUSE_ID: case CLOSURE_BSDF_BSSRDF_ID: - eval = bsdf_diffuse_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_diffuse_eval_reflect(sc, sd->I, omega_in, pdf); break; #ifdef __SVM__ case CLOSURE_BSDF_OREN_NAYAR_ID: - eval = bsdf_oren_nayar_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_oren_nayar_eval_reflect(sc, sd->I, omega_in, pdf); break; #ifdef __OSL__ case CLOSURE_BSDF_PHONG_RAMP_ID: - eval = bsdf_phong_ramp_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_phong_ramp_eval_reflect(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_DIFFUSE_RAMP_ID: - eval = bsdf_diffuse_ramp_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_diffuse_ramp_eval_reflect(sc, sd->I, omega_in, pdf); break; #endif case CLOSURE_BSDF_TRANSLUCENT_ID: - eval = bsdf_translucent_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_translucent_eval_reflect(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_REFLECTION_ID: - eval = bsdf_reflection_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_reflection_eval_reflect(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_REFRACTION_ID: - eval = bsdf_refraction_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_refraction_eval_reflect(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_TRANSPARENT_ID: - eval = bsdf_transparent_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_transparent_eval_reflect(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_MICROFACET_GGX_ID: case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: - eval = bsdf_microfacet_ggx_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_microfacet_ggx_eval_reflect(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: - eval = bsdf_microfacet_multi_ggx_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf, &ccl_fetch(sd, lcg_state)); + eval = bsdf_microfacet_multi_ggx_eval_reflect(sc, sd->I, omega_in, pdf, &sd->lcg_state); break; case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: - eval = bsdf_microfacet_multi_ggx_glass_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf, &ccl_fetch(sd, lcg_state)); + eval = bsdf_microfacet_multi_ggx_glass_eval_reflect(sc, sd->I, omega_in, pdf, &sd->lcg_state); break; case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: - eval = bsdf_microfacet_beckmann_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_microfacet_beckmann_eval_reflect(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: - eval = bsdf_ashikhmin_shirley_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_ashikhmin_shirley_eval_reflect(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: - eval = bsdf_ashikhmin_velvet_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_ashikhmin_velvet_eval_reflect(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_DIFFUSE_TOON_ID: - eval = bsdf_diffuse_toon_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_diffuse_toon_eval_reflect(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_GLOSSY_TOON_ID: - eval = bsdf_glossy_toon_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_glossy_toon_eval_reflect(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_HAIR_REFLECTION_ID: - eval = bsdf_hair_reflection_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_hair_reflection_eval_reflect(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: - eval = bsdf_hair_transmission_eval_reflect(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_hair_transmission_eval_reflect(sc, sd->I, omega_in, pdf); break; #endif #ifdef __VOLUME__ case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: - eval = volume_henyey_greenstein_eval_phase(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf); break; #endif default: @@ -237,63 +237,63 @@ float3 bsdf_eval(KernelGlobals *kg, switch(sc->type) { case CLOSURE_BSDF_DIFFUSE_ID: case CLOSURE_BSDF_BSSRDF_ID: - eval = bsdf_diffuse_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_diffuse_eval_transmit(sc, sd->I, omega_in, pdf); break; #ifdef __SVM__ case CLOSURE_BSDF_OREN_NAYAR_ID: - eval = bsdf_oren_nayar_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_oren_nayar_eval_transmit(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_TRANSLUCENT_ID: - eval = bsdf_translucent_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_translucent_eval_transmit(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_REFLECTION_ID: - eval = bsdf_reflection_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_reflection_eval_transmit(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_REFRACTION_ID: - eval = bsdf_refraction_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_refraction_eval_transmit(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_TRANSPARENT_ID: - eval = bsdf_transparent_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_transparent_eval_transmit(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_MICROFACET_GGX_ID: case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: - eval = bsdf_microfacet_ggx_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_microfacet_ggx_eval_transmit(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: - eval = bsdf_microfacet_multi_ggx_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf, &ccl_fetch(sd, lcg_state)); + eval = bsdf_microfacet_multi_ggx_eval_transmit(sc, sd->I, omega_in, pdf, &sd->lcg_state); break; case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: - eval = bsdf_microfacet_multi_ggx_glass_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf, &ccl_fetch(sd, lcg_state)); + eval = bsdf_microfacet_multi_ggx_glass_eval_transmit(sc, sd->I, omega_in, pdf, &sd->lcg_state); break; case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: - eval = bsdf_microfacet_beckmann_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_microfacet_beckmann_eval_transmit(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: - eval = bsdf_ashikhmin_shirley_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_ashikhmin_shirley_eval_transmit(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: - eval = bsdf_ashikhmin_velvet_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_ashikhmin_velvet_eval_transmit(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_DIFFUSE_TOON_ID: - eval = bsdf_diffuse_toon_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_diffuse_toon_eval_transmit(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_GLOSSY_TOON_ID: - eval = bsdf_glossy_toon_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_glossy_toon_eval_transmit(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_HAIR_REFLECTION_ID: - eval = bsdf_hair_reflection_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_hair_reflection_eval_transmit(sc, sd->I, omega_in, pdf); break; case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: - eval = bsdf_hair_transmission_eval_transmit(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = bsdf_hair_transmission_eval_transmit(sc, sd->I, omega_in, pdf); break; #endif #ifdef __VOLUME__ case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: - eval = volume_henyey_greenstein_eval_phase(sc, ccl_fetch(sd, I), omega_in, pdf); + eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf); break; #endif default: diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h index 08ccee56335..cc62192ef21 100644 --- a/intern/cycles/kernel/geom/geom_attribute.h +++ b/intern/cycles/kernel/geom/geom_attribute.h @@ -30,7 +30,7 @@ ccl_device_inline uint subd_triangle_patch(KernelGlobals *kg, const ShaderData * ccl_device_inline uint attribute_primitive_type(KernelGlobals *kg, const ShaderData *sd) { #ifdef __HAIR__ - if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { + if(sd->type & PRIMITIVE_ALL_CURVE) { return ATTR_PRIM_CURVE; } else @@ -53,12 +53,12 @@ ccl_device_inline AttributeDescriptor attribute_not_found() ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id) { - if(ccl_fetch(sd, object) == PRIM_NONE) { + if(sd->object == PRIM_NONE) { return attribute_not_found(); } /* for SVM, find attribute by unique id */ - uint attr_offset = ccl_fetch(sd, object)*kernel_data.bvh.attributes_map_stride; + uint attr_offset = sd->object*kernel_data.bvh.attributes_map_stride; attr_offset += attribute_primitive_type(kg, sd); uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); @@ -73,7 +73,7 @@ ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals *kg, const Sh AttributeDescriptor desc; desc.element = (AttributeElement)attr_map.y; - if(ccl_fetch(sd, prim) == PRIM_NONE && + if(sd->prim == PRIM_NONE && desc.element != ATTR_ELEMENT_MESH && desc.element != ATTR_ELEMENT_VOXEL && desc.element != ATTR_ELEMENT_OBJECT) diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h index 9de335403ce..7cc840ce78d 100644 --- a/intern/cycles/kernel/geom/geom_curve.h +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -32,22 +32,22 @@ ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, if(dy) *dy = 0.0f; #endif - return kernel_tex_fetch(__attributes_float, desc.offset + ccl_fetch(sd, prim)); + return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim); } else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { - float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0); float f1 = kernel_tex_fetch(__attributes_float, desc.offset + k1); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0); + if(dx) *dx = sd->du.dx*(f1 - f0); if(dy) *dy = 0.0f; #endif - return (1.0f - ccl_fetch(sd, u))*f0 + ccl_fetch(sd, u)*f1; + return (1.0f - sd->u)*f0 + sd->u*f1; } else { #ifdef __RAY_DIFFERENTIALS__ @@ -71,22 +71,22 @@ ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); #endif - return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + ccl_fetch(sd, prim))); + return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim)); } else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { - float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0)); float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1)); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0); + if(dx) *dx = sd->du.dx*(f1 - f0); if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); #endif - return (1.0f - ccl_fetch(sd, u))*f0 + ccl_fetch(sd, u)*f1; + return (1.0f - sd->u)*f0 + sd->u*f1; } else { #ifdef __RAY_DIFFERENTIALS__ @@ -104,22 +104,22 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd) { float r = 0.0f; - if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { - float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); + if(sd->type & PRIMITIVE_ALL_CURVE) { + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float4 P_curve[2]; - if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) { + if(sd->type & PRIMITIVE_CURVE) { P_curve[0]= kernel_tex_fetch(__curve_keys, k0); P_curve[1]= kernel_tex_fetch(__curve_keys, k1); } else { - motion_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), k0, k1, P_curve); + motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve); } - r = (P_curve[1].w - P_curve[0].w) * ccl_fetch(sd, u) + P_curve[0].w; + r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w; } return r*2.0f; @@ -130,8 +130,8 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd) ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd) { - float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float4 P_curve[2]; @@ -139,7 +139,7 @@ ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd P_curve[0]= kernel_tex_fetch(__curve_keys, k0); P_curve[1]= kernel_tex_fetch(__curve_keys, k1); - return float4_to_float3(P_curve[1]) * ccl_fetch(sd, u) + float4_to_float3(P_curve[0]) * (1.0f - ccl_fetch(sd, u)); + return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u); } /* Curve tangent normal */ @@ -148,14 +148,14 @@ ccl_device float3 curve_tangent_normal(KernelGlobals *kg, ShaderData *sd) { float3 tgN = make_float3(0.0f,0.0f,0.0f); - if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { + if(sd->type & PRIMITIVE_ALL_CURVE) { - tgN = -(-ccl_fetch(sd, I) - ccl_fetch(sd, dPdu) * (dot(ccl_fetch(sd, dPdu),-ccl_fetch(sd, I)) / len_squared(ccl_fetch(sd, dPdu)))); + tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu,-sd->I) / len_squared(sd->dPdu))); tgN = normalize(tgN); /* need to find suitable scaled gd for corrected normal */ #if 0 - tgN = normalize(tgN - gd * ccl_fetch(sd, dPdu)); + tgN = normalize(tgN - gd * sd->dPdu); #endif } @@ -229,6 +229,15 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte float3 P, float3 dir, uint visibility, int object, int curveAddr, float time,int type, uint *lcg_state, float difl, float extmax) #endif { + const bool is_curve_primitive = (type & PRIMITIVE_CURVE); + + if(!is_curve_primitive && kernel_data.bvh.use_bvh_steps) { + const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); + if(time < prim_time.x || time > prim_time.y) { + return false; + } + } + int segment = PRIMITIVE_UNPACK_SEGMENT(type); float epsilon = 0.0f; float r_st, r_en; @@ -257,7 +266,7 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte #ifdef __KERNEL_AVX2__ avxf P_curve_0_1, P_curve_2_3; - if(type & PRIMITIVE_CURVE) { + if(is_curve_primitive) { P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x); P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x); } @@ -268,7 +277,7 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte #else /* __KERNEL_AVX2__ */ ssef P_curve[4]; - if(type & PRIMITIVE_CURVE) { + if(is_curve_primitive) { P_curve[0] = load4f(&kg->__curve_keys.data[ka].x); P_curve[1] = load4f(&kg->__curve_keys.data[k0].x); P_curve[2] = load4f(&kg->__curve_keys.data[k1].x); @@ -363,7 +372,7 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte float4 P_curve[4]; - if(type & PRIMITIVE_CURVE) { + if(is_curve_primitive) { P_curve[0] = kernel_tex_fetch(__curve_keys, ka); P_curve[1] = kernel_tex_fetch(__curve_keys, k0); P_curve[2] = kernel_tex_fetch(__curve_keys, k1); @@ -689,6 +698,15 @@ ccl_device_forceinline bool bvh_curve_intersect(KernelGlobals *kg, Intersection # define dot3(x, y) dot(x, y) #endif + const bool is_curve_primitive = (type & PRIMITIVE_CURVE); + + if(!is_curve_primitive && kernel_data.bvh.use_bvh_steps) { + const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); + if(time < prim_time.x || time > prim_time.y) { + return false; + } + } + int segment = PRIMITIVE_UNPACK_SEGMENT(type); /* curve Intersection check */ int flags = kernel_data.curve.curveflags; @@ -703,7 +721,7 @@ ccl_device_forceinline bool bvh_curve_intersect(KernelGlobals *kg, Intersection #ifndef __KERNEL_SSE2__ float4 P_curve[2]; - if(type & PRIMITIVE_CURVE) { + if(is_curve_primitive) { P_curve[0] = kernel_tex_fetch(__curve_keys, k0); P_curve[1] = kernel_tex_fetch(__curve_keys, k1); } @@ -738,7 +756,7 @@ ccl_device_forceinline bool bvh_curve_intersect(KernelGlobals *kg, Intersection #else ssef P_curve[2]; - if(type & PRIMITIVE_CURVE) { + if(is_curve_primitive) { P_curve[0] = load4f(&kg->__curve_keys.data[k0].x); P_curve[1] = load4f(&kg->__curve_keys.data[k1].x); } @@ -948,7 +966,7 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = ccl_fetch(sd, ob_itfm); + Transform tfm = sd->ob_itfm; #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); #endif @@ -961,7 +979,7 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con int prim = kernel_tex_fetch(__prim_index, isect->prim); float4 v00 = kernel_tex_fetch(__curves, prim); - int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); + int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float3 tg; @@ -972,14 +990,14 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con float4 P_curve[4]; - if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) { + if(sd->type & PRIMITIVE_CURVE) { P_curve[0] = kernel_tex_fetch(__curve_keys, ka); P_curve[1] = kernel_tex_fetch(__curve_keys, k0); P_curve[2] = kernel_tex_fetch(__curve_keys, k1); P_curve[3] = kernel_tex_fetch(__curve_keys, kb); } else { - motion_cardinal_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), ka, k0, k1, kb, P_curve); + motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve); } float3 p[4]; @@ -991,43 +1009,43 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con P = P + D*t; #ifdef __UV__ - ccl_fetch(sd, u) = isect->u; - ccl_fetch(sd, v) = 0.0f; + sd->u = isect->u; + sd->v = 0.0f; #endif tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3])); if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) { - ccl_fetch(sd, Ng) = normalize(-(D - tg * (dot(tg, D)))); + sd->Ng = normalize(-(D - tg * (dot(tg, D)))); } else { /* direction from inside to surface of curve */ float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]); - ccl_fetch(sd, Ng) = normalize(P - p_curr); + sd->Ng = normalize(P - p_curr); /* adjustment for changing radius */ float gd = isect->v; if(gd != 0.0f) { - ccl_fetch(sd, Ng) = ccl_fetch(sd, Ng) - gd * tg; - ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng)); + sd->Ng = sd->Ng - gd * tg; + sd->Ng = normalize(sd->Ng); } } /* todo: sometimes the normal is still so that this is detected as * backfacing even if cull backfaces is enabled */ - ccl_fetch(sd, N) = ccl_fetch(sd, Ng); + sd->N = sd->Ng; } else { float4 P_curve[2]; - if(ccl_fetch(sd, type) & PRIMITIVE_CURVE) { + if(sd->type & PRIMITIVE_CURVE) { P_curve[0]= kernel_tex_fetch(__curve_keys, k0); P_curve[1]= kernel_tex_fetch(__curve_keys, k1); } else { - motion_curve_keys(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), k0, k1, P_curve); + motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve); } float l = 1.0f; @@ -1038,39 +1056,39 @@ ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, con float3 dif = P - float4_to_float3(P_curve[0]); #ifdef __UV__ - ccl_fetch(sd, u) = dot(dif,tg)/l; - ccl_fetch(sd, v) = 0.0f; + sd->u = dot(dif,tg)/l; + sd->v = 0.0f; #endif if(flag & CURVE_KN_TRUETANGENTGNORMAL) { - ccl_fetch(sd, Ng) = -(D - tg * dot(tg, D)); - ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng)); + sd->Ng = -(D - tg * dot(tg, D)); + sd->Ng = normalize(sd->Ng); } else { float gd = isect->v; /* direction from inside to surface of curve */ - ccl_fetch(sd, Ng) = (dif - tg * ccl_fetch(sd, u) * l) / (P_curve[0].w + ccl_fetch(sd, u) * l * gd); + sd->Ng = (dif - tg * sd->u * l) / (P_curve[0].w + sd->u * l * gd); /* adjustment for changing radius */ if(gd != 0.0f) { - ccl_fetch(sd, Ng) = ccl_fetch(sd, Ng) - gd * tg; - ccl_fetch(sd, Ng) = normalize(ccl_fetch(sd, Ng)); + sd->Ng = sd->Ng - gd * tg; + sd->Ng = normalize(sd->Ng); } } - ccl_fetch(sd, N) = ccl_fetch(sd, Ng); + sd->N = sd->Ng; } #ifdef __DPDU__ /* dPdu/dPdv */ - ccl_fetch(sd, dPdu) = tg; - ccl_fetch(sd, dPdv) = cross(tg, ccl_fetch(sd, Ng)); + sd->dPdu = tg; + sd->dPdv = cross(tg, sd->Ng); #endif if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = ccl_fetch(sd, ob_tfm); + Transform tfm = sd->ob_tfm; #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); #endif diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h index d57d74ea882..2500228281e 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h @@ -48,7 +48,7 @@ ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, return P; } # ifdef __OBJECT_MOTION__ - Transform tfm = ccl_fetch(sd, ob_itfm); + Transform tfm = sd->ob_itfm; # else Transform tfm = object_fetch_transform(kg, isect->object, @@ -77,7 +77,7 @@ ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, if(isect->object != OBJECT_NONE) { # ifdef __OBJECT_MOTION__ - Transform tfm = ccl_fetch(sd, ob_tfm); + Transform tfm = sd->ob_tfm; # else Transform tfm = object_fetch_transform(kg, isect->object, @@ -116,7 +116,7 @@ float3 motion_triangle_refine_subsurface(KernelGlobals *kg, # ifdef __INTERSECTION_REFINE__ if(isect->object != OBJECT_NONE) { # ifdef __OBJECT_MOTION__ - Transform tfm = ccl_fetch(sd, ob_itfm); + Transform tfm = sd->ob_itfm; # else Transform tfm = object_fetch_transform(kg, isect->object, @@ -144,7 +144,7 @@ float3 motion_triangle_refine_subsurface(KernelGlobals *kg, if(isect->object != OBJECT_NONE) { # ifdef __OBJECT_MOTION__ - Transform tfm = ccl_fetch(sd, ob_tfm); + Transform tfm = sd->ob_tfm; # else Transform tfm = object_fetch_transform(kg, isect->object, diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h index 0e024a05db6..cb456056e20 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h @@ -39,26 +39,26 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, bool subsurface) { /* Get shader. */ - ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim)); + sd->shader = kernel_tex_fetch(__tri_shader, sd->prim); /* Get motion info. */ /* TODO(sergey): This logic is really similar to motion_triangle_vertices(), * can we de-duplicate something here? */ int numsteps, numverts; - object_motion_info(kg, ccl_fetch(sd, object), &numsteps, &numverts, NULL); + object_motion_info(kg, sd->object, &numsteps, &numverts, NULL); /* Figure out which steps we need to fetch and their interpolation factor. */ int maxstep = numsteps*2; - int step = min((int)(ccl_fetch(sd, time)*maxstep), maxstep-1); - float t = ccl_fetch(sd, time)*maxstep - step; + int step = min((int)(sd->time*maxstep), maxstep-1); + float t = sd->time*maxstep - step; /* Find attribute. */ AttributeElement elem; - int offset = find_attribute_motion(kg, ccl_fetch(sd, object), + int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); kernel_assert(offset != ATTR_STD_NOT_FOUND); /* Fetch vertex coordinates. */ float3 verts[3], next_verts[3]; - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts); motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts); /* Interpolate between steps. */ @@ -68,7 +68,7 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, /* Compute refined position. */ #ifdef __SUBSURFACE__ if(subsurface) { - ccl_fetch(sd, P) = motion_triangle_refine_subsurface(kg, + sd->P = motion_triangle_refine_subsurface(kg, sd, isect, ray, @@ -77,29 +77,29 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, else #endif /* __SUBSURFACE__*/ { - ccl_fetch(sd, P) = motion_triangle_refine(kg, sd, isect, ray, verts); + sd->P = motion_triangle_refine(kg, sd, isect, ray, verts); } /* Compute face normal. */ float3 Ng; - if(ccl_fetch(sd, object_flag) & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { + if(sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0])); } else { Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0])); } - ccl_fetch(sd, Ng) = Ng; - ccl_fetch(sd, N) = Ng; + sd->Ng = Ng; + sd->N = Ng; /* Compute derivatives of P w.r.t. uv. */ #ifdef __DPDU__ - ccl_fetch(sd, dPdu) = (verts[0] - verts[2]); - ccl_fetch(sd, dPdv) = (verts[1] - verts[2]); + sd->dPdu = (verts[0] - verts[2]); + sd->dPdv = (verts[1] - verts[2]); #endif /* Compute smooth normal. */ - if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) { + if(sd->shader & SHADER_SMOOTH_NORMAL) { /* Find attribute. */ AttributeElement elem; int offset = find_attribute_motion(kg, - ccl_fetch(sd, object), + sd->object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem); kernel_assert(offset != ATTR_STD_NOT_FOUND); @@ -112,10 +112,10 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, normals[1] = (1.0f - t)*normals[1] + t*next_normals[1]; normals[2] = (1.0f - t)*normals[2] + t*next_normals[2]; /* Interpolate between vertices. */ - float u = ccl_fetch(sd, u); - float v = ccl_fetch(sd, v); + float u = sd->u; + float v = sd->v; float w = 1.0f - u - v; - ccl_fetch(sd, N) = (u*normals[0] + v*normals[1] + w*normals[2]); + sd->N = (u*normals[0] + v*normals[1] + w*normals[2]); } } diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h index f51b2d18657..5a04be8b0bf 100644 --- a/intern/cycles/kernel/geom/geom_object.h +++ b/intern/cycles/kernel/geom/geom_object.h @@ -137,9 +137,9 @@ ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg ccl_device_inline void object_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P) { #ifdef __OBJECT_MOTION__ - *P = transform_point_auto(&ccl_fetch(sd, ob_tfm), *P); + *P = transform_point_auto(&sd->ob_tfm, *P); #else - Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); *P = transform_point(&tfm, *P); #endif } @@ -149,9 +149,9 @@ ccl_device_inline void object_position_transform(KernelGlobals *kg, const Shader ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P) { #ifdef __OBJECT_MOTION__ - *P = transform_point_auto(&ccl_fetch(sd, ob_itfm), *P); + *P = transform_point_auto(&sd->ob_itfm, *P); #else - Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); *P = transform_point(&tfm, *P); #endif } @@ -161,12 +161,12 @@ ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, cons ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N) { #ifdef __OBJECT_MOTION__ - if((ccl_fetch(sd, object) != OBJECT_NONE) || (ccl_fetch(sd, type) == PRIMITIVE_LAMP)) { - *N = normalize(transform_direction_transposed_auto(&ccl_fetch(sd, ob_tfm), *N)); + if((sd->object != OBJECT_NONE) || (sd->type == PRIMITIVE_LAMP)) { + *N = normalize(transform_direction_transposed_auto(&sd->ob_tfm, *N)); } #else - if(ccl_fetch(sd, object) != OBJECT_NONE) { - Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM); + if(sd->object != OBJECT_NONE) { + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); *N = normalize(transform_direction_transposed(&tfm, *N)); } #endif @@ -177,9 +177,9 @@ ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N) { #ifdef __OBJECT_MOTION__ - *N = normalize(transform_direction_transposed_auto(&ccl_fetch(sd, ob_itfm), *N)); + *N = normalize(transform_direction_transposed_auto(&sd->ob_itfm, *N)); #else - Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); *N = normalize(transform_direction_transposed(&tfm, *N)); #endif } @@ -189,9 +189,9 @@ ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderDa ccl_device_inline void object_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D) { #ifdef __OBJECT_MOTION__ - *D = transform_direction_auto(&ccl_fetch(sd, ob_tfm), *D); + *D = transform_direction_auto(&sd->ob_tfm, *D); #else - Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); *D = transform_direction(&tfm, *D); #endif } @@ -201,9 +201,9 @@ ccl_device_inline void object_dir_transform(KernelGlobals *kg, const ShaderData ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D) { #ifdef __OBJECT_MOTION__ - *D = transform_direction_auto(&ccl_fetch(sd, ob_itfm), *D); + *D = transform_direction_auto(&sd->ob_itfm, *D); #else - Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); *D = transform_direction(&tfm, *D); #endif } @@ -212,13 +212,13 @@ ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const Sha ccl_device_inline float3 object_location(KernelGlobals *kg, const ShaderData *sd) { - if(ccl_fetch(sd, object) == OBJECT_NONE) + if(sd->object == OBJECT_NONE) return make_float3(0.0f, 0.0f, 0.0f); #ifdef __OBJECT_MOTION__ - return make_float3(ccl_fetch(sd, ob_tfm).x.w, ccl_fetch(sd, ob_tfm).y.w, ccl_fetch(sd, ob_tfm).z.w); + return make_float3(sd->ob_tfm.x.w, sd->ob_tfm.y.w, sd->ob_tfm.z.w); #else - Transform tfm = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); return make_float3(tfm.x.w, tfm.y.w, tfm.z.w); #endif } @@ -326,7 +326,7 @@ ccl_device_inline uint object_patch_map_offset(KernelGlobals *kg, int object) ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd) { - return kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE + 1); + return kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE + 1); } /* Particle data from which object was instanced */ diff --git a/intern/cycles/kernel/geom/geom_patch.h b/intern/cycles/kernel/geom/geom_patch.h index 6a0ff5a4a04..5663b598508 100644 --- a/intern/cycles/kernel/geom/geom_patch.h +++ b/intern/cycles/kernel/geom/geom_patch.h @@ -267,7 +267,7 @@ ccl_device float patch_eval_float(KernelGlobals *kg, const ShaderData *sd, int o float weights_du[PATCH_MAX_CONTROL_VERTS]; float weights_dv[PATCH_MAX_CONTROL_VERTS]; - int num_control = patch_eval_control_verts(kg, ccl_fetch(sd, object), patch, u, v, channel, + int num_control = patch_eval_control_verts(kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv); float val = 0.0f; @@ -294,7 +294,7 @@ ccl_device float3 patch_eval_float3(KernelGlobals *kg, const ShaderData *sd, int float weights_du[PATCH_MAX_CONTROL_VERTS]; float weights_dv[PATCH_MAX_CONTROL_VERTS]; - int num_control = patch_eval_control_verts(kg, ccl_fetch(sd, object), patch, u, v, channel, + int num_control = patch_eval_control_verts(kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv); float3 val = make_float3(0.0f, 0.0f, 0.0f); @@ -321,7 +321,7 @@ ccl_device float3 patch_eval_uchar4(KernelGlobals *kg, const ShaderData *sd, int float weights_du[PATCH_MAX_CONTROL_VERTS]; float weights_dv[PATCH_MAX_CONTROL_VERTS]; - int num_control = patch_eval_control_verts(kg, ccl_fetch(sd, object), patch, u, v, channel, + int num_control = patch_eval_control_verts(kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv); float3 val = make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h index 8a73bb2f78b..90a9c2147cc 100644 --- a/intern/cycles/kernel/geom/geom_primitive.h +++ b/intern/cycles/kernel/geom/geom_primitive.h @@ -28,19 +28,19 @@ ccl_device_inline float primitive_attribute_float(KernelGlobals *kg, const AttributeDescriptor desc, float *dx, float *dy) { - if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) { + if(sd->type & PRIMITIVE_ALL_TRIANGLE) { if(subd_triangle_patch(kg, sd) == ~0) return triangle_attribute_float(kg, sd, desc, dx, dy); else return subd_triangle_attribute_float(kg, sd, desc, dx, dy); } #ifdef __HAIR__ - else if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { + else if(sd->type & PRIMITIVE_ALL_CURVE) { return curve_attribute_float(kg, sd, desc, dx, dy); } #endif #ifdef __VOLUME__ - else if(ccl_fetch(sd, object) != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { + else if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { return volume_attribute_float(kg, sd, desc, dx, dy); } #endif @@ -56,19 +56,19 @@ ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg, const AttributeDescriptor desc, float3 *dx, float3 *dy) { - if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) { + if(sd->type & PRIMITIVE_ALL_TRIANGLE) { if(subd_triangle_patch(kg, sd) == ~0) return triangle_attribute_float3(kg, sd, desc, dx, dy); else return subd_triangle_attribute_float3(kg, sd, desc, dx, dy); } #ifdef __HAIR__ - else if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { + else if(sd->type & PRIMITIVE_ALL_CURVE) { return curve_attribute_float3(kg, sd, desc, dx, dy); } #endif #ifdef __VOLUME__ - else if(ccl_fetch(sd, object) != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { + else if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { return volume_attribute_float3(kg, sd, desc, dx, dy); } #endif @@ -118,9 +118,9 @@ ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, in ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd) { #ifdef __HAIR__ - if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) + if(sd->type & PRIMITIVE_ALL_CURVE) # ifdef __DPDU__ - return normalize(ccl_fetch(sd, dPdu)); + return normalize(sd->dPdu); # else return make_float3(0.0f, 0.0f, 0.0f); # endif @@ -133,12 +133,12 @@ ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd) float3 data = primitive_attribute_float3(kg, sd, desc, NULL, NULL); data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f); object_normal_transform(kg, sd, &data); - return cross(ccl_fetch(sd, N), normalize(cross(data, ccl_fetch(sd, N)))); + return cross(sd->N, normalize(cross(data, sd->N))); } else { /* otherwise use surface derivatives */ #ifdef __DPDU__ - return normalize(ccl_fetch(sd, dPdu)); + return normalize(sd->dPdu); #else return make_float3(0.0f, 0.0f, 0.0f); #endif @@ -153,17 +153,17 @@ ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData * float3 center; #ifdef __HAIR__ - bool is_curve_primitive = ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE; + bool is_curve_primitive = sd->type & PRIMITIVE_ALL_CURVE; if(is_curve_primitive) { center = curve_motion_center_location(kg, sd); - if(!(ccl_fetch(sd, object_flag) & SD_OBJECT_TRANSFORM_APPLIED)) { + if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { object_position_transform(kg, sd, ¢er); } } else #endif - center = ccl_fetch(sd, P); + center = sd->P; float3 motion_pre = center, motion_post = center; @@ -173,16 +173,16 @@ ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData * if(desc.offset != ATTR_STD_NOT_FOUND) { /* get motion info */ int numverts, numkeys; - object_motion_info(kg, ccl_fetch(sd, object), NULL, &numverts, &numkeys); + object_motion_info(kg, sd->object, NULL, &numverts, &numkeys); /* lookup attributes */ motion_pre = primitive_attribute_float3(kg, sd, desc, NULL, NULL); - desc.offset += (ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)? numverts: numkeys; + desc.offset += (sd->type & PRIMITIVE_ALL_TRIANGLE)? numverts: numkeys; motion_post = primitive_attribute_float3(kg, sd, desc, NULL, NULL); #ifdef __HAIR__ - if(is_curve_primitive && (ccl_fetch(sd, object_flag) & SD_OBJECT_HAS_VERTEX_MOTION) == 0) { + if(is_curve_primitive && (sd->object_flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) { object_position_transform(kg, sd, &motion_pre); object_position_transform(kg, sd, &motion_post); } @@ -193,10 +193,10 @@ ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData * * transformation was set match the world/object space of motion_pre/post */ Transform tfm; - tfm = object_fetch_vector_transform(kg, ccl_fetch(sd, object), OBJECT_VECTOR_MOTION_PRE); + tfm = object_fetch_vector_transform(kg, sd->object, OBJECT_VECTOR_MOTION_PRE); motion_pre = transform_point(&tfm, motion_pre); - tfm = object_fetch_vector_transform(kg, ccl_fetch(sd, object), OBJECT_VECTOR_MOTION_POST); + tfm = object_fetch_vector_transform(kg, sd->object, OBJECT_VECTOR_MOTION_POST); motion_post = transform_point(&tfm, motion_post); float3 motion_center; diff --git a/intern/cycles/kernel/geom/geom_subd_triangle.h b/intern/cycles/kernel/geom/geom_subd_triangle.h index 647840dc696..044e82f03d4 100644 --- a/intern/cycles/kernel/geom/geom_subd_triangle.h +++ b/intern/cycles/kernel/geom/geom_subd_triangle.h @@ -22,14 +22,14 @@ CCL_NAMESPACE_BEGIN ccl_device_inline uint subd_triangle_patch(KernelGlobals *kg, const ShaderData *sd) { - return (ccl_fetch(sd, prim) != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, ccl_fetch(sd, prim)) : ~0; + return (sd->prim != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, sd->prim) : ~0; } /* UV coords of triangle within patch */ ccl_device_inline void subd_triangle_patch_uv(KernelGlobals *kg, const ShaderData *sd, float2 uv[3]) { - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); uv[0] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.x); uv[1] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.y); @@ -110,7 +110,7 @@ ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals *kg, const float2 dpdv = uv[1] - uv[2]; /* p is [s, t] */ - float2 p = dpdu * ccl_fetch(sd, u) + dpdv * ccl_fetch(sd, v) + uv[2]; + float2 p = dpdu * sd->u + dpdv * sd->v + uv[2]; float a, dads, dadt; a = patch_eval_float(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); @@ -123,8 +123,8 @@ ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals *kg, const float dtdv = dpdv.y; if(dx) { - float dudx = ccl_fetch(sd, du).dx; - float dvdx = ccl_fetch(sd, dv).dx; + float dudx = sd->du.dx; + float dvdx = sd->dv.dx; float dsdx = dsdu*dudx + dsdv*dvdx; float dtdx = dtdu*dudx + dtdv*dvdx; @@ -132,8 +132,8 @@ ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals *kg, const *dx = dads*dsdx + dadt*dtdx; } if(dy) { - float dudy = ccl_fetch(sd, du).dy; - float dvdy = ccl_fetch(sd, dv).dy; + float dudy = sd->du.dy; + float dvdy = sd->dv.dy; float dsdy = dsdu*dudy + dsdv*dvdy; float dtdy = dtdu*dudy + dtdv*dvdy; @@ -174,11 +174,11 @@ ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals *kg, const float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = ccl_fetch(sd, du).dx*a + ccl_fetch(sd, dv).dx*b - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*c; - if(dy) *dy = ccl_fetch(sd, du).dy*a + ccl_fetch(sd, dv).dy*b - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*c; + if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c; + if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c; #endif - return ccl_fetch(sd, u)*a + ccl_fetch(sd, v)*b + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*c; + return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c; } else if(desc.element == ATTR_ELEMENT_CORNER) { float2 uv[3]; @@ -202,11 +202,11 @@ ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals *kg, const float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = ccl_fetch(sd, du).dx*a + ccl_fetch(sd, dv).dx*b - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*c; - if(dy) *dy = ccl_fetch(sd, du).dy*a + ccl_fetch(sd, dv).dy*b - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*c; + if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c; + if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c; #endif - return ccl_fetch(sd, u)*a + ccl_fetch(sd, v)*b + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*c; + return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c; } else { if(dx) *dx = 0.0f; @@ -229,7 +229,7 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg, con float2 dpdv = uv[1] - uv[2]; /* p is [s, t] */ - float2 p = dpdu * ccl_fetch(sd, u) + dpdv * ccl_fetch(sd, v) + uv[2]; + float2 p = dpdu * sd->u + dpdv * sd->v + uv[2]; float3 a, dads, dadt; @@ -248,8 +248,8 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg, con float dtdv = dpdv.y; if(dx) { - float dudx = ccl_fetch(sd, du).dx; - float dvdx = ccl_fetch(sd, dv).dx; + float dudx = sd->du.dx; + float dvdx = sd->dv.dx; float dsdx = dsdu*dudx + dsdv*dvdx; float dtdx = dtdu*dudx + dtdv*dvdx; @@ -257,8 +257,8 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg, con *dx = dads*dsdx + dadt*dtdx; } if(dy) { - float dudy = ccl_fetch(sd, du).dy; - float dvdy = ccl_fetch(sd, dv).dy; + float dudy = sd->du.dy; + float dvdy = sd->dv.dy; float dsdy = dsdu*dudy + dsdv*dvdy; float dtdy = dtdu*dudy + dtdv*dvdy; @@ -299,11 +299,11 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg, con float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = ccl_fetch(sd, du).dx*a + ccl_fetch(sd, dv).dx*b - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*c; - if(dy) *dy = ccl_fetch(sd, du).dy*a + ccl_fetch(sd, dv).dy*b - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*c; + if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c; + if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c; #endif - return ccl_fetch(sd, u)*a + ccl_fetch(sd, v)*b + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*c; + return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c; } else if(desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) { float2 uv[3]; @@ -337,11 +337,11 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg, con float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = ccl_fetch(sd, du).dx*a + ccl_fetch(sd, dv).dx*b - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*c; - if(dy) *dy = ccl_fetch(sd, du).dy*a + ccl_fetch(sd, dv).dy*b - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*c; + if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c; + if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c; #endif - return ccl_fetch(sd, u)*a + ccl_fetch(sd, v)*b + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*c; + return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c; } else { if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h index 3229091bbb0..47778553b94 100644 --- a/intern/cycles/kernel/geom/geom_triangle.h +++ b/intern/cycles/kernel/geom/geom_triangle.h @@ -26,13 +26,13 @@ CCL_NAMESPACE_BEGIN ccl_device_inline float3 triangle_normal(KernelGlobals *kg, ShaderData *sd) { /* load triangle vertices */ - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); + const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); const float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0)); const float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1)); const float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2)); /* return normal */ - if(ccl_fetch(sd, object_flag) & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { + if(sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { return normalize(cross(v2 - v0, v1 - v0)); } else { @@ -110,34 +110,34 @@ ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *s if(dx) *dx = 0.0f; if(dy) *dy = 0.0f; - return kernel_tex_fetch(__attributes_float, desc.offset + ccl_fetch(sd, prim)); + return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim); } else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); float f0 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.x); float f1 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.y); float f2 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.z); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2; - if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2; + if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; + if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; #endif - return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2; + return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; } else if(desc.element == ATTR_ELEMENT_CORNER) { - int tri = desc.offset + ccl_fetch(sd, prim)*3; + int tri = desc.offset + sd->prim*3; float f0 = kernel_tex_fetch(__attributes_float, tri + 0); float f1 = kernel_tex_fetch(__attributes_float, tri + 1); float f2 = kernel_tex_fetch(__attributes_float, tri + 2); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2; - if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2; + if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; + if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; #endif - return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2; + return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; } else { if(dx) *dx = 0.0f; @@ -153,24 +153,24 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + ccl_fetch(sd, prim))); + return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim)); } else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x)); float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y)); float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z)); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2; - if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2; + if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; + if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; #endif - return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2; + return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; } else if(desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) { - int tri = desc.offset + ccl_fetch(sd, prim)*3; + int tri = desc.offset + sd->prim*3; float3 f0, f1, f2; if(desc.element == ATTR_ELEMENT_CORNER) { @@ -185,11 +185,11 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData } #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2; - if(dy) *dy = ccl_fetch(sd, du).dy*f0 + ccl_fetch(sd, dv).dy*f1 - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*f2; + if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; + if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; #endif - return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2; + return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; } else { if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h index 4db121d94f4..4d234dd62bd 100644 --- a/intern/cycles/kernel/geom/geom_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h @@ -457,7 +457,7 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg, return P; } # ifdef __OBJECT_MOTION__ - Transform tfm = ccl_fetch(sd, ob_itfm); + Transform tfm = sd->ob_itfm; # else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); # endif @@ -491,7 +491,7 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg, if(isect->object != OBJECT_NONE) { # ifdef __OBJECT_MOTION__ - Transform tfm = ccl_fetch(sd, ob_tfm); + Transform tfm = sd->ob_tfm; # else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); # endif @@ -519,7 +519,7 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = ccl_fetch(sd, ob_itfm); + Transform tfm = sd->ob_itfm; #else Transform tfm = object_fetch_transform(kg, isect->object, @@ -557,7 +557,7 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = ccl_fetch(sd, ob_tfm); + Transform tfm = sd->ob_tfm; #else Transform tfm = object_fetch_transform(kg, isect->object, diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h index 03724c955be..1e0ef5201c9 100644 --- a/intern/cycles/kernel/geom/geom_volume.h +++ b/intern/cycles/kernel/geom/geom_volume.h @@ -64,7 +64,7 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg, ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { - float3 P = volume_normalized_position(kg, sd, ccl_fetch(sd, P)); + float3 P = volume_normalized_position(kg, sd, sd->P); #ifdef __KERNEL_CUDA__ # if __CUDA_ARCH__ >= 300 CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset); @@ -91,7 +91,7 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy) { - float3 P = volume_normalized_position(kg, sd, ccl_fetch(sd, P)); + float3 P = volume_normalized_position(kg, sd, sd->P); #ifdef __KERNEL_CUDA__ # if __CUDA_ARCH__ >= 300 CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset); diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index 9279a94c13a..cd339e6237e 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -20,6 +20,7 @@ /* CPU Kernel Interface */ #include "util_types.h" +#include "kernel_types.h" CCL_NAMESPACE_BEGIN @@ -28,6 +29,7 @@ CCL_NAMESPACE_BEGIN #define KERNEL_FUNCTION_FULL_NAME(name) KERNEL_NAME_EVAL(KERNEL_ARCH, name) struct KernelGlobals; +struct KernelData; KernelGlobals *kernel_globals_create(); void kernel_globals_free(KernelGlobals *kg); diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index 5bcc57cdcdf..f18d145f7cf 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -54,7 +54,8 @@ ccl_device_inline void compute_light_pass(KernelGlobals *kg, float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF); shader_eval_surface(kg, sd, &rng, &state, rbsdf, state.flag, SHADER_CONTEXT_MAIN); - /* TODO, disable the closures we won't need */ + /* TODO, disable more closures we don't need besides transparent */ + shader_bsdf_disable_transparency(kg, sd); #ifdef __BRANCHED_PATH__ if(!kernel_data.integrator.branched) { diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h index dedac6b1465..0df5217d97a 100644 --- a/intern/cycles/kernel/kernel_camera.h +++ b/intern/cycles/kernel/kernel_camera.h @@ -457,7 +457,7 @@ ccl_device_inline float3 camera_world_to_ndc(KernelGlobals *kg, ShaderData *sd, { if(kernel_data.cam.type != CAMERA_PANORAMA) { /* perspective / ortho */ - if(ccl_fetch(sd, object) == PRIM_NONE && kernel_data.cam.type == CAMERA_PERSPECTIVE) + if(sd->object == PRIM_NONE && kernel_data.cam.type == CAMERA_PERSPECTIVE) P += camera_position(kg); Transform tfm = kernel_data.cam.worldtondc; @@ -467,7 +467,7 @@ ccl_device_inline float3 camera_world_to_ndc(KernelGlobals *kg, ShaderData *sd, /* panorama */ Transform tfm = kernel_data.cam.worldtocamera; - if(ccl_fetch(sd, object) != OBJECT_NONE) + if(sd->object != OBJECT_NONE) P = normalize(transform_point(&tfm, P)); else P = normalize(transform_direction(&tfm, P)); diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index 9d1f3bdc918..e347a1eca18 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -44,6 +44,15 @@ #define ccl_addr_space +#define ccl_local_id(d) 0 +#define ccl_global_id(d) (kg->global_id[d]) + +#define ccl_local_size(d) 1 +#define ccl_global_size(d) (kg->global_size[d]) + +#define ccl_group_id(d) ccl_global_id(d) +#define ccl_num_groups(d) ccl_global_size(d) + /* On x86_64, versions of glibc < 2.16 have an issue where expf is * much slower than the double version. This was fixed in glibc 2.16. */ diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index e0c7b17c6a0..37a9e8d2f84 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -46,6 +46,9 @@ #define ccl_device_noinline __device__ __noinline__ #define ccl_global #define ccl_constant +#define ccl_local __shared__ +#define ccl_local_param +#define ccl_private #define ccl_may_alias #define ccl_addr_space #define ccl_restrict __restrict__ @@ -60,6 +63,52 @@ #include "util_half.h" #include "util_types.h" +/* Work item functions */ + +ccl_device_inline uint ccl_local_id(uint d) +{ + switch(d) { + case 0: return threadIdx.x; + case 1: return threadIdx.y; + case 2: return threadIdx.z; + default: return 0; + } +} + +#define ccl_global_id(d) (ccl_group_id(d) * ccl_local_size(d) + ccl_local_id(d)) + +ccl_device_inline uint ccl_local_size(uint d) +{ + switch(d) { + case 0: return blockDim.x; + case 1: return blockDim.y; + case 2: return blockDim.z; + default: return 0; + } +} + +#define ccl_global_size(d) (ccl_num_groups(d) * ccl_local_size(d)) + +ccl_device_inline uint ccl_group_id(uint d) +{ + switch(d) { + case 0: return blockIdx.x; + case 1: return blockIdx.y; + case 2: return blockIdx.z; + default: return 0; + } +} + +ccl_device_inline uint ccl_num_groups(uint d) +{ + switch(d) { + case 0: return gridDim.x; + case 1: return gridDim.y; + case 2: return gridDim.z; + default: return 0; + } +} + /* Textures */ typedef texture<float4, 1> texture_float4; diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index f076e3a7d37..6c963dea4f5 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -39,6 +39,7 @@ #define ccl_constant __constant #define ccl_global __global #define ccl_local __local +#define ccl_local_param __local #define ccl_private __private #define ccl_restrict restrict #define ccl_align(n) __attribute__((aligned(n))) @@ -49,6 +50,15 @@ # define ccl_addr_space #endif +#define ccl_local_id(d) get_local_id(d) +#define ccl_global_id(d) get_global_id(d) + +#define ccl_local_size(d) get_local_size(d) +#define ccl_global_size(d) get_global_size(d) + +#define ccl_group_id(d) get_group_id(d) +#define ccl_num_groups(d) get_num_groups(d) + /* Selective nodes compilation. */ #ifndef __NODES_MAX_GROUP__ # define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h index 8c7c651a053..bc2d9604122 100644 --- a/intern/cycles/kernel/kernel_emission.h +++ b/intern/cycles/kernel/kernel_emission.h @@ -67,7 +67,7 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time, false, ls->lamp); - ls->Ng = ccl_fetch(emission_sd, Ng); + ls->Ng = emission_sd->Ng; /* no path flag, we're evaluating this for all closures. that's weak but * we'd have to do multiple evaluations otherwise */ @@ -76,7 +76,7 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, path_state_modify_bounce(state, false); /* evaluate emissive closure */ - if(ccl_fetch(emission_sd, flag) & SD_EMISSION) + if(emission_sd->flag & SD_EMISSION) eval = shader_emissive_eval(kg, emission_sd); else eval = make_float3(0.0f, 0.0f, 0.0f); @@ -112,7 +112,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, -ls->D, dD, ls->t, - ccl_fetch(sd, time)); + sd->time); if(is_zero(light_eval)) return false; @@ -120,7 +120,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, /* evaluate BSDF at shading point */ #ifdef __VOLUME__ - if(ccl_fetch(sd, prim) != PRIM_NONE) + if(sd->prim != PRIM_NONE) shader_bsdf_eval(kg, sd, ls->D, eval, ls->pdf, ls->shader & SHADER_USE_MIS); else { float bsdf_pdf; @@ -168,8 +168,8 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, if(ls->shader & SHADER_CAST_SHADOW) { /* setup ray */ - bool transmit = (dot(ccl_fetch(sd, Ng), ls->D) < 0.0f); - ray->P = ray_offset(ccl_fetch(sd, P), (transmit)? -ccl_fetch(sd, Ng): ccl_fetch(sd, Ng)); + bool transmit = (dot(sd->Ng, ls->D) < 0.0f); + ray->P = ray_offset(sd->P, (transmit)? -sd->Ng: sd->Ng); if(ls->t == FLT_MAX) { /* distant light */ @@ -182,7 +182,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ray->D = normalize_len(ray->D, &ray->t); } - ray->dP = ccl_fetch(sd, dP); + ray->dP = sd->dP; ray->dD = differential3_zero(); } else { @@ -204,14 +204,14 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, Shader float3 L = shader_emissive_eval(kg, sd); #ifdef __HAIR__ - if(!(path_flag & PATH_RAY_MIS_SKIP) && (ccl_fetch(sd, flag) & SD_USE_MIS) && (ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)) + if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) && (sd->type & PRIMITIVE_ALL_TRIANGLE)) #else - if(!(path_flag & PATH_RAY_MIS_SKIP) && (ccl_fetch(sd, flag) & SD_USE_MIS)) + if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS)) #endif { /* multiple importance sampling, get triangle light pdf, * and compute weight with respect to BSDF pdf */ - float pdf = triangle_light_pdf(kg, ccl_fetch(sd, Ng), ccl_fetch(sd, I), t); + float pdf = triangle_light_pdf(kg, sd->Ng, sd->I, t); float mis_weight = power_heuristic(bsdf_pdf, pdf); return L*mis_weight; diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index 2b52a2d2f48..1c3884890bf 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -16,6 +16,9 @@ /* Constant Globals */ +#ifndef __KERNEL_GLOBALS_H__ +#define __KERNEL_GLOBALS_H__ + CCL_NAMESPACE_BEGIN /* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in @@ -64,6 +67,13 @@ typedef struct KernelGlobals { /* Storage for decoupled volume steps. */ VolumeStep *decoupled_volume_steps[2]; int decoupled_volume_steps_index; + + /* split kernel */ + SplitData split_data; + SplitParams split_param_data; + + int2 global_size; + int2 global_id; } KernelGlobals; #endif /* __KERNEL_CPU__ */ @@ -103,8 +113,8 @@ typedef ccl_addr_space struct KernelGlobals { # include "kernel_textures.h" # ifdef __SPLIT_KERNEL__ - ShaderData *sd_input; - Intersection *isect_shadow; + SplitData split_data; + SplitParams split_param_data; # endif } KernelGlobals; @@ -146,3 +156,4 @@ ccl_device float lookup_table_read_2D(KernelGlobals *kg, float x, float y, int o CCL_NAMESPACE_END +#endif /* __KERNEL_GLOBALS_H__ */ diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h index 7aec47e4957..ed523696571 100644 --- a/intern/cycles/kernel/kernel_passes.h +++ b/intern/cycles/kernel/kernel_passes.h @@ -19,16 +19,16 @@ CCL_NAMESPACE_BEGIN ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, int sample, float value) { ccl_global float *buf = buffer; -#if defined(__SPLIT_KERNEL__) && defined(__WORK_STEALING__) +#if defined(__SPLIT_KERNEL__) atomic_add_and_fetch_float(buf, value); #else *buf = (sample == 0)? value: *buf + value; -#endif // __SPLIT_KERNEL__ && __WORK_STEALING__ +#endif /* __SPLIT_KERNEL__ */ } ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, int sample, float3 value) { -#if defined(__SPLIT_KERNEL__) && defined(__WORK_STEALING__) +#if defined(__SPLIT_KERNEL__) ccl_global float *buf_x = buffer + 0; ccl_global float *buf_y = buffer + 1; ccl_global float *buf_z = buffer + 2; @@ -39,12 +39,12 @@ ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, int sa #else ccl_global float3 *buf = (ccl_global float3*)buffer; *buf = (sample == 0)? value: *buf + value; -#endif // __SPLIT_KERNEL__ && __WORK_STEALING__ +#endif /* __SPLIT_KERNEL__ */ } ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, int sample, float4 value) { -#if defined(__SPLIT_KERNEL__) && defined(__WORK_STEALING__) +#if defined(__SPLIT_KERNEL__) ccl_global float *buf_x = buffer + 0; ccl_global float *buf_y = buffer + 1; ccl_global float *buf_z = buffer + 2; @@ -57,7 +57,7 @@ ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, int sa #else ccl_global float4 *buf = (ccl_global float4*)buffer; *buf = (sample == 0)? value: *buf + value; -#endif // __SPLIT_KERNEL__ && __WORK_STEALING__ +#endif /* __SPLIT_KERNEL__ */ } ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L, @@ -75,18 +75,18 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl return; if(!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) { - if(!(ccl_fetch(sd, flag) & SD_TRANSPARENT) || + if(!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f || average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) { if(sample == 0) { if(flag & PASS_DEPTH) { - float depth = camera_distance(kg, ccl_fetch(sd, P)); + float depth = camera_distance(kg, sd->P); kernel_write_pass_float(buffer + kernel_data.film.pass_depth, sample, depth); } if(flag & PASS_OBJECT_ID) { - float id = object_pass_id(kg, ccl_fetch(sd, object)); + float id = object_pass_id(kg, sd->object); kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, sample, id); } if(flag & PASS_MATERIAL_ID) { @@ -96,7 +96,7 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl } if(flag & PASS_NORMAL) { - float3 normal = ccl_fetch(sd, N); + float3 normal = sd->N; kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, sample, normal); } if(flag & PASS_UV) { @@ -127,7 +127,7 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl float mist_start = kernel_data.film.mist_start; float mist_inv_depth = kernel_data.film.mist_inv_depth; - float depth = camera_distance(kg, ccl_fetch(sd, P)); + float depth = camera_distance(kg, sd->P); float mist = saturate((depth - mist_start)*mist_inv_depth); /* falloff */ diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index f90701a8260..95c27850513 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -75,17 +75,17 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg, sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); - if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) { + if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) { Ray light_ray; float3 ao_shadow; - light_ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng)); + light_ray.P = ray_offset(sd->P, sd->Ng); light_ray.D = ao_D; light_ray.t = kernel_data.background.ao_distance; #ifdef __OBJECT_MOTION__ - light_ray.time = ccl_fetch(sd, time); + light_ray.time = sd->time; #endif /* __OBJECT_MOTION__ */ - light_ray.dP = ccl_fetch(sd, dP); + light_ray.dP = sd->dP; light_ray.dD = differential3_zero(); if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) { @@ -459,7 +459,7 @@ bool kernel_path_subsurface_scatter( # ifdef __VOLUME__ ss_indirect->need_update_volume_stack = kernel_data.integrator.use_volumes && - ccl_fetch(sd, object_flag) & SD_OBJECT_INTERSECTS_VOLUME; + sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME; # endif /* __VOLUME__ */ /* compute lighting with the BSDF closure */ diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h index ff2b828795d..d58960cae4e 100644 --- a/intern/cycles/kernel/kernel_path_branched.h +++ b/intern/cycles/kernel/kernel_path_branched.h @@ -42,17 +42,17 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg, sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); - if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) { + if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) { Ray light_ray; float3 ao_shadow; - light_ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng)); + light_ray.P = ray_offset(sd->P, sd->Ng); light_ray.D = ao_D; light_ray.t = kernel_data.background.ao_distance; #ifdef __OBJECT_MOTION__ - light_ray.time = ccl_fetch(sd, time); + light_ray.time = sd->time; #endif /* __OBJECT_MOTION__ */ - light_ray.dP = ccl_fetch(sd, dP); + light_ray.dP = sd->dP; light_ray.dD = differential3_zero(); if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) @@ -67,8 +67,8 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba RNG *rng, ShaderData *sd, ShaderData *indirect_sd, ShaderData *emission_sd, float3 throughput, float num_samples_adjust, PathState *state, PathRadiance *L) { - for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { - const ShaderClosure *sc = &ccl_fetch(sd, closure)[i]; + for(int i = 0; i < sd->num_closure; i++) { + const ShaderClosure *sc = &sd->closure[i]; if(!CLOSURE_IS_BSDF(sc->type)) continue; @@ -140,8 +140,8 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, Ray *ray, float3 throughput) { - for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { - ShaderClosure *sc = &ccl_fetch(sd, closure)[i]; + for(int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; if(!CLOSURE_IS_BSSRDF(sc->type)) continue; @@ -169,7 +169,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, Ray volume_ray = *ray; bool need_update_volume_stack = kernel_data.integrator.use_volumes && - ccl_fetch(sd, object_flag) & SD_OBJECT_INTERSECTS_VOLUME; + sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME; #endif /* __VOLUME__ */ /* compute lighting with the BSDF closure */ diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h index fea503d06e5..34a78552c1d 100644 --- a/intern/cycles/kernel/kernel_path_surface.h +++ b/intern/cycles/kernel/kernel_path_surface.h @@ -25,7 +25,7 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal { #ifdef __EMISSION__ /* sample illumination from lights to find path contribution */ - if(!(ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL)) + if(!(sd->flag & SD_BSDF_HAS_EVAL)) return; Ray light_ray; @@ -33,7 +33,7 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal bool is_lamp; # ifdef __OBJECT_MOTION__ - light_ray.time = ccl_fetch(sd, time); + light_ray.time = sd->time; # endif if(sample_all_lights) { @@ -52,7 +52,7 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal float terminate = path_branched_rng_light_termination(kg, &lamp_rng, state, j, num_samples); LightSample ls; - if(lamp_light_sample(kg, i, light_u, light_v, ccl_fetch(sd, P), &ls)) { + if(lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) { /* The sampling probability returned by lamp_light_sample assumes that all lights were sampled. * However, this code only samples lamps, so if the scene also had mesh lights, the real probability is twice as high. */ if(kernel_data.integrator.pdf_triangles != 0.0f) @@ -87,7 +87,7 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal light_t = 0.5f*light_t; LightSample ls; - if(light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls)) { + if(light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { /* Same as above, probability needs to be corrected since the sampling was forced to select a mesh light. */ if(kernel_data.integrator.num_all_lights) ls.pdf *= 2.0f; @@ -113,7 +113,7 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal float terminate = path_state_rng_light_termination(kg, rng, state); LightSample ls; - if(light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls)) { + if(light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { /* sample random light */ if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { /* trace shadow ray */ @@ -156,15 +156,15 @@ ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg, RNG *rng, path_state_next(kg, state, label); /* setup ray */ - ray->P = ray_offset(ccl_fetch(sd, P), (label & LABEL_TRANSMIT)? -ccl_fetch(sd, Ng): ccl_fetch(sd, Ng)); + ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng); ray->D = normalize(bsdf_omega_in); ray->t = FLT_MAX; #ifdef __RAY_DIFFERENTIALS__ - ray->dP = ccl_fetch(sd, dP); + ray->dP = sd->dP; ray->dD = bsdf_domega_in; #endif #ifdef __OBJECT_MOTION__ - ray->time = ccl_fetch(sd, time); + ray->time = sd->time; #endif #ifdef __VOLUME__ @@ -195,7 +195,7 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_ PathRadiance *L) { #ifdef __EMISSION__ - if(!(kernel_data.integrator.use_direct_light && (ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL))) + if(!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL))) return; /* sample illumination from lights to find path contribution */ @@ -208,11 +208,11 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_ bool is_lamp; #ifdef __OBJECT_MOTION__ - light_ray.time = ccl_fetch(sd, time); + light_ray.time = sd->time; #endif LightSample ls; - if(light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls)) { + if(light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { float terminate = path_state_rng_light_termination(kg, rng, state); if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { /* trace shadow ray */ @@ -238,7 +238,7 @@ ccl_device bool kernel_path_surface_bounce(KernelGlobals *kg, ccl_addr_space Ray *ray) { /* no BSDF? we can stop here */ - if(ccl_fetch(sd, flag) & SD_BSDF) { + if(sd->flag & SD_BSDF) { /* sample BSDF */ float bsdf_pdf; BsdfEval bsdf_eval; @@ -270,16 +270,16 @@ ccl_device bool kernel_path_surface_bounce(KernelGlobals *kg, path_state_next(kg, state, label); /* setup ray */ - ray->P = ray_offset(ccl_fetch(sd, P), (label & LABEL_TRANSMIT)? -ccl_fetch(sd, Ng): ccl_fetch(sd, Ng)); + ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng); ray->D = normalize(bsdf_omega_in); if(state->bounce == 0) - ray->t -= ccl_fetch(sd, ray_length); /* clipping works through transparent */ + ray->t -= sd->ray_length; /* clipping works through transparent */ else ray->t = FLT_MAX; #ifdef __RAY_DIFFERENTIALS__ - ray->dP = ccl_fetch(sd, dP); + ray->dP = sd->dP; ray->dD = bsdf_domega_in; #endif @@ -291,21 +291,21 @@ ccl_device bool kernel_path_surface_bounce(KernelGlobals *kg, return true; } #ifdef __VOLUME__ - else if(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME) { + else if(sd->flag & SD_HAS_ONLY_VOLUME) { /* no surface shader but have a volume shader? act transparent */ /* update path state, count as transparent */ path_state_next(kg, state, LABEL_TRANSPARENT); if(state->bounce == 0) - ray->t -= ccl_fetch(sd, ray_length); /* clipping works through transparent */ + ray->t -= sd->ray_length; /* clipping works through transparent */ else ray->t = FLT_MAX; /* setup ray position, direction stays unchanged */ - ray->P = ray_offset(ccl_fetch(sd, P), -ccl_fetch(sd, Ng)); + ray->P = ray_offset(sd->P, -sd->Ng); #ifdef __RAY_DIFFERENTIALS__ - ray->dP = ccl_fetch(sd, dP); + ray->dP = sd->dP; #endif /* enter/exit volume */ diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h index cf5614b8a86..2e63909a38c 100644 --- a/intern/cycles/kernel/kernel_queues.h +++ b/intern/cycles/kernel/kernel_queues.h @@ -17,6 +17,8 @@ #ifndef __KERNEL_QUEUE_H__ #define __KERNEL_QUEUE_H__ +CCL_NAMESPACE_BEGIN + /* * Queue utility functions for split kernel */ @@ -35,7 +37,8 @@ ccl_device void enqueue_ray_index( ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */ { /* This thread's queue index. */ - int my_queue_index = atomic_inc(&queue_index[queue_number]) + (queue_number * queue_size); + int my_queue_index = atomic_fetch_and_inc_uint32((ccl_global uint*)&queue_index[queue_number]) + + (queue_number * queue_size); queues[my_queue_index] = ray_index; } @@ -47,6 +50,7 @@ ccl_device void enqueue_ray_index( * is no more ray to allocate to other threads. */ ccl_device int get_ray_index( + KernelGlobals *kg, int thread_index, /* Global thread index. */ int queue_number, /* Queue to operate on. */ ccl_global int *queues, /* Buffer of all queues. */ @@ -68,24 +72,25 @@ ccl_device void enqueue_ray_index_local( int queue_number, /* Queue in which to enqueue ray index. */ char enqueue_flag, /* True for threads whose ray index has to be enqueued. */ int queuesize, /* queue size. */ - ccl_local unsigned int *local_queue_atomics, /* To to local queue atomics. */ + ccl_local_param unsigned int *local_queue_atomics, /* To to local queue atomics. */ ccl_global int *Queue_data, /* Queues. */ ccl_global int *Queue_index) /* To do global queue atomics. */ { - int lidx = get_local_id(1) * get_local_size(0) + get_local_id(0); + int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0); /* Get local queue id .*/ unsigned int lqidx; if(enqueue_flag) { - lqidx = atomic_inc(local_queue_atomics); + lqidx = atomic_fetch_and_inc_uint32(local_queue_atomics); } - barrier(CLK_LOCAL_MEM_FENCE); + ccl_barrier(CCL_LOCAL_MEM_FENCE); /* Get global queue offset. */ if(lidx == 0) { - *local_queue_atomics = atomic_add(&Queue_index[queue_number], *local_queue_atomics); + *local_queue_atomics = atomic_fetch_and_add_uint32((ccl_global uint*)&Queue_index[queue_number], + *local_queue_atomics); } - barrier(CLK_LOCAL_MEM_FENCE); + ccl_barrier(CCL_LOCAL_MEM_FENCE); /* Get global queue index and enqueue ray. */ if(enqueue_flag) { @@ -96,19 +101,19 @@ ccl_device void enqueue_ray_index_local( ccl_device unsigned int get_local_queue_index( int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */ - ccl_local unsigned int *local_queue_atomics) + ccl_local_param unsigned int *local_queue_atomics) { - int my_lqidx = atomic_inc(&local_queue_atomics[queue_number]); + int my_lqidx = atomic_fetch_and_inc_uint32(&local_queue_atomics[queue_number]); return my_lqidx; } ccl_device unsigned int get_global_per_queue_offset( int queue_number, - ccl_local unsigned int *local_queue_atomics, + ccl_local_param unsigned int *local_queue_atomics, ccl_global int* global_queue_atomics) { - unsigned int queue_offset = atomic_add(&global_queue_atomics[queue_number], - local_queue_atomics[queue_number]); + unsigned int queue_offset = atomic_fetch_and_add_uint32((ccl_global uint*)&global_queue_atomics[queue_number], + local_queue_atomics[queue_number]); return queue_offset; } @@ -116,10 +121,12 @@ ccl_device unsigned int get_global_queue_index( int queue_number, int queuesize, unsigned int lqidx, - ccl_local unsigned int * global_per_queue_offset) + ccl_local_param unsigned int * global_per_queue_offset) { int my_gqidx = queuesize * queue_number + lqidx + global_per_queue_offset[queue_number]; return my_gqidx; } +CCL_NAMESPACE_END + #endif // __KERNEL_QUEUE_H__ diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index d0826e5e879..a2ab96b35e2 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -38,13 +38,13 @@ CCL_NAMESPACE_BEGIN #ifdef __OBJECT_MOTION__ ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time) { - if(ccl_fetch(sd, object_flag) & SD_OBJECT_MOTION) { - ccl_fetch(sd, ob_tfm) = object_fetch_transform_motion(kg, ccl_fetch(sd, object), time); - ccl_fetch(sd, ob_itfm) = transform_quick_inverse(ccl_fetch(sd, ob_tfm)); + if(sd->object_flag & SD_OBJECT_MOTION) { + sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time); + sd->ob_itfm = transform_quick_inverse(sd->ob_tfm); } else { - ccl_fetch(sd, ob_tfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM); - ccl_fetch(sd, ob_itfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM); + sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); } } #endif @@ -55,55 +55,55 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg, const Ray *ray) { #ifdef __INSTANCING__ - ccl_fetch(sd, object) = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object; + sd->object = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object; #endif - ccl_fetch(sd, type) = isect->type; - ccl_fetch(sd, flag) = 0; - ccl_fetch(sd, object_flag) = kernel_tex_fetch(__object_flag, - ccl_fetch(sd, object)); + sd->type = isect->type; + sd->flag = 0; + sd->object_flag = kernel_tex_fetch(__object_flag, + sd->object); /* matrices and time */ #ifdef __OBJECT_MOTION__ shader_setup_object_transforms(kg, sd, ray->time); - ccl_fetch(sd, time) = ray->time; + sd->time = ray->time; #endif - ccl_fetch(sd, prim) = kernel_tex_fetch(__prim_index, isect->prim); - ccl_fetch(sd, ray_length) = isect->t; + sd->prim = kernel_tex_fetch(__prim_index, isect->prim); + sd->ray_length = isect->t; #ifdef __UV__ - ccl_fetch(sd, u) = isect->u; - ccl_fetch(sd, v) = isect->v; + sd->u = isect->u; + sd->v = isect->v; #endif #ifdef __HAIR__ - if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { + if(sd->type & PRIMITIVE_ALL_CURVE) { /* curve */ - float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - ccl_fetch(sd, shader) = __float_as_int(curvedata.z); - ccl_fetch(sd, P) = bvh_curve_refine(kg, sd, isect, ray); + sd->shader = __float_as_int(curvedata.z); + sd->P = bvh_curve_refine(kg, sd, isect, ray); } else #endif - if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) { + if(sd->type & PRIMITIVE_TRIANGLE) { /* static triangle */ float3 Ng = triangle_normal(kg, sd); - ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim)); + sd->shader = kernel_tex_fetch(__tri_shader, sd->prim); /* vectors */ - ccl_fetch(sd, P) = triangle_refine(kg, sd, isect, ray); - ccl_fetch(sd, Ng) = Ng; - ccl_fetch(sd, N) = Ng; + sd->P = triangle_refine(kg, sd, isect, ray); + sd->Ng = Ng; + sd->N = Ng; /* smooth normal */ - if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) - ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v)); + if(sd->shader & SHADER_SMOOTH_NORMAL) + sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v); #ifdef __DPDU__ /* dPdu/dPdv */ - triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv)); + triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); #endif } else { @@ -111,40 +111,40 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg, motion_triangle_shader_setup(kg, sd, isect, ray, false); } - ccl_fetch(sd, I) = -ray->D; + sd->I = -ray->D; - ccl_fetch(sd, flag) |= kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE); + sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE); #ifdef __INSTANCING__ if(isect->object != OBJECT_NONE) { /* instance transform */ - object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N)); - object_normal_transform_auto(kg, sd, &ccl_fetch(sd, Ng)); + object_normal_transform_auto(kg, sd, &sd->N); + object_normal_transform_auto(kg, sd, &sd->Ng); # ifdef __DPDU__ - object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu)); - object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv)); + object_dir_transform_auto(kg, sd, &sd->dPdu); + object_dir_transform_auto(kg, sd, &sd->dPdv); # endif } #endif /* backfacing test */ - bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f); + bool backfacing = (dot(sd->Ng, sd->I) < 0.0f); if(backfacing) { - ccl_fetch(sd, flag) |= SD_BACKFACING; - ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng); - ccl_fetch(sd, N) = -ccl_fetch(sd, N); + sd->flag |= SD_BACKFACING; + sd->Ng = -sd->Ng; + sd->N = -sd->N; #ifdef __DPDU__ - ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu); - ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv); + sd->dPdu = -sd->dPdu; + sd->dPdv = -sd->dPdv; #endif } #ifdef __RAY_DIFFERENTIALS__ /* differentials */ - differential_transfer(&ccl_fetch(sd, dP), ray->dP, ray->D, ray->dD, ccl_fetch(sd, Ng), isect->t); - differential_incoming(&ccl_fetch(sd, dI), ray->dD); - differential_dudv(&ccl_fetch(sd, du), &ccl_fetch(sd, dv), ccl_fetch(sd, dPdu), ccl_fetch(sd, dPdv), ccl_fetch(sd, dP), ccl_fetch(sd, Ng)); + differential_transfer(&sd->dP, ray->dP, ray->D, ray->dD, sd->Ng, isect->t); + differential_incoming(&sd->dI, ray->dD); + differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng); #endif } @@ -249,106 +249,106 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg, int lamp) { /* vectors */ - ccl_fetch(sd, P) = P; - ccl_fetch(sd, N) = Ng; - ccl_fetch(sd, Ng) = Ng; - ccl_fetch(sd, I) = I; - ccl_fetch(sd, shader) = shader; + sd->P = P; + sd->N = Ng; + sd->Ng = Ng; + sd->I = I; + sd->shader = shader; if(prim != PRIM_NONE) - ccl_fetch(sd, type) = PRIMITIVE_TRIANGLE; + sd->type = PRIMITIVE_TRIANGLE; else if(lamp != LAMP_NONE) - ccl_fetch(sd, type) = PRIMITIVE_LAMP; + sd->type = PRIMITIVE_LAMP; else - ccl_fetch(sd, type) = PRIMITIVE_NONE; + sd->type = PRIMITIVE_NONE; /* primitive */ #ifdef __INSTANCING__ - ccl_fetch(sd, object) = object; + sd->object = object; #endif /* currently no access to bvh prim index for strand sd->prim*/ - ccl_fetch(sd, prim) = prim; + sd->prim = prim; #ifdef __UV__ - ccl_fetch(sd, u) = u; - ccl_fetch(sd, v) = v; + sd->u = u; + sd->v = v; #endif - ccl_fetch(sd, ray_length) = t; + sd->ray_length = t; - ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE); - ccl_fetch(sd, object_flag) = 0; - if(ccl_fetch(sd, object) != OBJECT_NONE) { - ccl_fetch(sd, object_flag) |= kernel_tex_fetch(__object_flag, - ccl_fetch(sd, object)); + sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE); + sd->object_flag = 0; + if(sd->object != OBJECT_NONE) { + sd->object_flag |= kernel_tex_fetch(__object_flag, + sd->object); #ifdef __OBJECT_MOTION__ shader_setup_object_transforms(kg, sd, time); - ccl_fetch(sd, time) = time; + sd->time = time; } else if(lamp != LAMP_NONE) { - ccl_fetch(sd, ob_tfm) = lamp_fetch_transform(kg, lamp, false); - ccl_fetch(sd, ob_itfm) = lamp_fetch_transform(kg, lamp, true); + sd->ob_tfm = lamp_fetch_transform(kg, lamp, false); + sd->ob_itfm = lamp_fetch_transform(kg, lamp, true); #endif } /* transform into world space */ if(object_space) { - object_position_transform_auto(kg, sd, &ccl_fetch(sd, P)); - object_normal_transform_auto(kg, sd, &ccl_fetch(sd, Ng)); - ccl_fetch(sd, N) = ccl_fetch(sd, Ng); - object_dir_transform_auto(kg, sd, &ccl_fetch(sd, I)); + object_position_transform_auto(kg, sd, &sd->P); + object_normal_transform_auto(kg, sd, &sd->Ng); + sd->N = sd->Ng; + object_dir_transform_auto(kg, sd, &sd->I); } - if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) { + if(sd->type & PRIMITIVE_TRIANGLE) { /* smooth normal */ - if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) { - ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v)); + if(sd->shader & SHADER_SMOOTH_NORMAL) { + sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v); #ifdef __INSTANCING__ - if(!(ccl_fetch(sd, object_flag) & SD_OBJECT_TRANSFORM_APPLIED)) { - object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N)); + if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + object_normal_transform_auto(kg, sd, &sd->N); } #endif } /* dPdu/dPdv */ #ifdef __DPDU__ - triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv)); + triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); # ifdef __INSTANCING__ - if(!(ccl_fetch(sd, object_flag) & SD_OBJECT_TRANSFORM_APPLIED)) { - object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu)); - object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv)); + if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + object_dir_transform_auto(kg, sd, &sd->dPdu); + object_dir_transform_auto(kg, sd, &sd->dPdv); } # endif #endif } else { #ifdef __DPDU__ - ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f); - ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f); + sd->dPdu = make_float3(0.0f, 0.0f, 0.0f); + sd->dPdv = make_float3(0.0f, 0.0f, 0.0f); #endif } /* backfacing test */ - if(ccl_fetch(sd, prim) != PRIM_NONE) { - bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f); + if(sd->prim != PRIM_NONE) { + bool backfacing = (dot(sd->Ng, sd->I) < 0.0f); if(backfacing) { - ccl_fetch(sd, flag) |= SD_BACKFACING; - ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng); - ccl_fetch(sd, N) = -ccl_fetch(sd, N); + sd->flag |= SD_BACKFACING; + sd->Ng = -sd->Ng; + sd->N = -sd->N; #ifdef __DPDU__ - ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu); - ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv); + sd->dPdu = -sd->dPdu; + sd->dPdv = -sd->dPdv; #endif } } #ifdef __RAY_DIFFERENTIALS__ /* no ray differentials here yet */ - ccl_fetch(sd, dP) = differential3_zero(); - ccl_fetch(sd, dI) = differential3_zero(); - ccl_fetch(sd, du) = differential_zero(); - ccl_fetch(sd, dv) = differential_zero(); + sd->dP = differential3_zero(); + sd->dI = differential3_zero(); + sd->du = differential_zero(); + sd->dv = differential_zero(); #endif } @@ -378,39 +378,39 @@ ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd, ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray) { /* vectors */ - ccl_fetch(sd, P) = ray->D; - ccl_fetch(sd, N) = -ray->D; - ccl_fetch(sd, Ng) = -ray->D; - ccl_fetch(sd, I) = -ray->D; - ccl_fetch(sd, shader) = kernel_data.background.surface_shader; - ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE); - ccl_fetch(sd, object_flag) = 0; + sd->P = ray->D; + sd->N = -ray->D; + sd->Ng = -ray->D; + sd->I = -ray->D; + sd->shader = kernel_data.background.surface_shader; + sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE); + sd->object_flag = 0; #ifdef __OBJECT_MOTION__ - ccl_fetch(sd, time) = ray->time; + sd->time = ray->time; #endif - ccl_fetch(sd, ray_length) = 0.0f; + sd->ray_length = 0.0f; #ifdef __INSTANCING__ - ccl_fetch(sd, object) = PRIM_NONE; + sd->object = PRIM_NONE; #endif - ccl_fetch(sd, prim) = PRIM_NONE; + sd->prim = PRIM_NONE; #ifdef __UV__ - ccl_fetch(sd, u) = 0.0f; - ccl_fetch(sd, v) = 0.0f; + sd->u = 0.0f; + sd->v = 0.0f; #endif #ifdef __DPDU__ /* dPdu/dPdv */ - ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f); - ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f); + sd->dPdu = make_float3(0.0f, 0.0f, 0.0f); + sd->dPdv = make_float3(0.0f, 0.0f, 0.0f); #endif #ifdef __RAY_DIFFERENTIALS__ /* differentials */ - ccl_fetch(sd, dP) = ray->dD; - differential_incoming(&ccl_fetch(sd, dI), ccl_fetch(sd, dP)); - ccl_fetch(sd, du) = differential_zero(); - ccl_fetch(sd, dv) = differential_zero(); + sd->dP = ray->dD; + differential_incoming(&sd->dI, sd->dP); + sd->du = differential_zero(); + sd->dv = differential_zero(); #endif } @@ -505,11 +505,11 @@ ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, ShaderData *sd { /* this is the veach one-sample model with balance heuristic, some pdf * factors drop out when using balance heuristic weighting */ - for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { + for(int i = 0; i < sd->num_closure; i++) { if(i == skip_bsdf) continue; - const ShaderClosure *sc = ccl_fetch_array(sd, closure, i); + const ShaderClosure *sc = &sd->closure[i]; if(CLOSURE_IS_BSDF(sc->type)) { float bsdf_pdf = 0.0f; @@ -535,8 +535,8 @@ ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg, float light_pdf, bool use_mis) { - for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { - const ShaderClosure *sc = ccl_fetch_array(sd, closure, i); + for(int i = 0; i < sd->num_closure; i++) { + const ShaderClosure *sc = &sd->closure[i]; if(CLOSURE_IS_BSDF(sc->type)) { float bsdf_pdf = 0.0f; float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf); @@ -591,22 +591,22 @@ ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg, { int sampled = 0; - if(ccl_fetch(sd, num_closure) > 1) { + if(sd->num_closure > 1) { /* pick a BSDF closure based on sample weights */ float sum = 0.0f; - for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) { - const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled); + for(sampled = 0; sampled < sd->num_closure; sampled++) { + const ShaderClosure *sc = &sd->closure[sampled]; if(CLOSURE_IS_BSDF(sc->type)) sum += sc->sample_weight; } - float r = ccl_fetch(sd, randb_closure)*sum; + float r = sd->randb_closure*sum; sum = 0.0f; - for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) { - const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled); + for(sampled = 0; sampled < sd->num_closure; sampled++) { + const ShaderClosure *sc = &sd->closure[sampled]; if(CLOSURE_IS_BSDF(sc->type)) { sum += sc->sample_weight; @@ -616,13 +616,13 @@ ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg, } } - if(sampled == ccl_fetch(sd, num_closure)) { + if(sampled == sd->num_closure) { *pdf = 0.0f; return LABEL_NONE; } } - const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled); + const ShaderClosure *sc = &sd->closure[sampled]; int label; float3 eval; @@ -633,7 +633,7 @@ ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg, if(*pdf != 0.0f) { bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass); - if(ccl_fetch(sd, num_closure) > 1) { + if(sd->num_closure > 1) { float sweight = sc->sample_weight; _shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sampled, bsdf_eval, *pdf*sweight, sweight); } @@ -660,8 +660,8 @@ ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, ShaderData *sd, ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughness) { - for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { - ShaderClosure *sc = ccl_fetch_array(sd, closure, i); + for(int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; if(CLOSURE_IS_BSDF(sc->type)) bsdf_blur(kg, sc, roughness); @@ -670,13 +670,13 @@ ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughn ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, ShaderData *sd) { - if(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME) + if(sd->flag & SD_HAS_ONLY_VOLUME) return make_float3(1.0f, 1.0f, 1.0f); float3 eval = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { - ShaderClosure *sc = ccl_fetch_array(sd, closure, i); + for(int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) // todo: make this work for osl eval += sc->weight; @@ -685,6 +685,18 @@ ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, ShaderData *sd) return eval; } +ccl_device void shader_bsdf_disable_transparency(KernelGlobals *kg, ShaderData *sd) +{ + for(int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; + + if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) { + sc->sample_weight = 0.0f; + sc->weight = make_float3(0.0f, 0.0f, 0.0f); + } + } +} + ccl_device float3 shader_bsdf_alpha(KernelGlobals *kg, ShaderData *sd) { float3 alpha = make_float3(1.0f, 1.0f, 1.0f) - shader_bsdf_transparency(kg, sd); @@ -699,8 +711,8 @@ ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd) { float3 eval = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { - ShaderClosure *sc = ccl_fetch_array(sd, closure, i); + for(int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) eval += sc->weight; @@ -713,8 +725,8 @@ ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd) { float3 eval = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { - ShaderClosure *sc = ccl_fetch_array(sd, closure, i); + for(int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; if(CLOSURE_IS_BSDF_GLOSSY(sc->type)) eval += sc->weight; @@ -727,8 +739,8 @@ ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd) { float3 eval = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { - ShaderClosure *sc = ccl_fetch_array(sd, closure, i); + for(int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; if(CLOSURE_IS_BSDF_TRANSMISSION(sc->type)) eval += sc->weight; @@ -741,8 +753,8 @@ ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd) { float3 eval = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { - ShaderClosure *sc = ccl_fetch_array(sd, closure, i); + for(int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; if(CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type)) eval += sc->weight; @@ -756,8 +768,8 @@ ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_fac float3 eval = make_float3(0.0f, 0.0f, 0.0f); float3 N = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { - ShaderClosure *sc = ccl_fetch_array(sd, closure, i); + for(int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) { const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc; @@ -766,12 +778,12 @@ ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_fac } else if(CLOSURE_IS_AMBIENT_OCCLUSION(sc->type)) { eval += sc->weight; - N += ccl_fetch(sd, N)*average(sc->weight); + N += sd->N*average(sc->weight); } } if(is_zero(N)) - N = ccl_fetch(sd, N); + N = sd->N; else N = normalize(N); @@ -786,8 +798,8 @@ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_b float3 N = make_float3(0.0f, 0.0f, 0.0f); float texture_blur = 0.0f, weight_sum = 0.0f; - for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { - ShaderClosure *sc = ccl_fetch_array(sd, closure, i); + for(int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; if(CLOSURE_IS_BSSRDF(sc->type)) { const Bssrdf *bssrdf = (const Bssrdf*)sc; @@ -801,7 +813,7 @@ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_b } if(N_) - *N_ = (is_zero(N))? ccl_fetch(sd, N): normalize(N); + *N_ = (is_zero(N))? sd->N: normalize(N); if(texture_blur_) *texture_blur_ = texture_blur/weight_sum; @@ -814,7 +826,7 @@ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_b ccl_device float3 emissive_eval(KernelGlobals *kg, ShaderData *sd, ShaderClosure *sc) { - return emissive_simple_eval(ccl_fetch(sd, Ng), ccl_fetch(sd, I)); + return emissive_simple_eval(sd->Ng, sd->I); } ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd) @@ -822,8 +834,8 @@ ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd) float3 eval; eval = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { - ShaderClosure *sc = ccl_fetch_array(sd, closure, i); + for(int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; if(CLOSURE_IS_EMISSION(sc->type)) eval += emissive_eval(kg, sd, sc)*sc->weight; @@ -838,8 +850,8 @@ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd) { float3 weight = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { - ShaderClosure *sc = ccl_fetch_array(sd, closure, i); + for(int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; if(CLOSURE_IS_HOLDOUT(sc->type)) weight += sc->weight; @@ -853,9 +865,9 @@ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd) ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_addr_space RNG *rng, ccl_addr_space PathState *state, float randb, int path_flag, ShaderContext ctx) { - ccl_fetch(sd, num_closure) = 0; - ccl_fetch(sd, num_closure_extra) = 0; - ccl_fetch(sd, randb_closure) = randb; + sd->num_closure = 0; + sd->num_closure_extra = 0; + sd->randb_closure = randb; #ifdef __OSL__ if(kg->osl) @@ -869,13 +881,13 @@ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_addr_ DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), make_float3(0.8f, 0.8f, 0.8f)); - bsdf->N = ccl_fetch(sd, N); - ccl_fetch(sd, flag) |= bsdf_diffuse_setup(bsdf); + bsdf->N = sd->N; + sd->flag |= bsdf_diffuse_setup(bsdf); #endif } - if(rng && (ccl_fetch(sd, flag) & SD_BSDF_NEEDS_LCG)) { - ccl_fetch(sd, lcg_state) = lcg_state_init_addrspace(rng, state, 0xb4bc3953); + if(rng && (sd->flag & SD_BSDF_NEEDS_LCG)) { + sd->lcg_state = lcg_state_init_addrspace(rng, state, 0xb4bc3953); } } @@ -884,9 +896,9 @@ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_addr_ ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, int path_flag, ShaderContext ctx) { - ccl_fetch(sd, num_closure) = 0; - ccl_fetch(sd, num_closure_extra) = 0; - ccl_fetch(sd, randb_closure) = 0.0f; + sd->num_closure = 0; + sd->num_closure_extra = 0; + sd->randb_closure = 0.0f; #ifdef __SVM__ #ifdef __OSL__ @@ -901,8 +913,8 @@ ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd, float3 eval = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { - const ShaderClosure *sc = ccl_fetch_array(sd, closure, i); + for(int i = 0; i < sd->num_closure; i++) { + const ShaderClosure *sc = &sd->closure[i]; if(CLOSURE_IS_BACKGROUND(sc->type)) eval += sc->weight; @@ -1081,9 +1093,9 @@ ccl_device_inline void shader_eval_volume(KernelGlobals *kg, ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, ShaderContext ctx) { - ccl_fetch(sd, num_closure) = 0; - ccl_fetch(sd, num_closure_extra) = 0; - ccl_fetch(sd, randb_closure) = 0.0f; + sd->num_closure = 0; + sd->num_closure_extra = 0; + sd->randb_closure = 0.0f; /* this will modify sd->P */ #ifdef __SVM__ diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h index 06a77a208cb..2483c5f9ae1 100644 --- a/intern/cycles/kernel/kernel_shadow.h +++ b/intern/cycles/kernel/kernel_shadow.h @@ -45,7 +45,7 @@ ccl_device_forceinline bool shadow_handle_transparent_isect( /* Setup shader data at surface. */ shader_setup_from_ray(kg, shadow_sd, isect, ray); /* Attenuation from transparent surface. */ - if(!(ccl_fetch(shadow_sd, flag) & SD_HAS_ONLY_VOLUME)) { + if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) { path_state_modify_bounce(state, true); shader_eval_surface(kg, shadow_sd, @@ -180,7 +180,7 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, return true; } /* Move ray forward. */ - ray->P = ccl_fetch(shadow_sd, P); + ray->P = shadow_sd->P; if(ray->t != FLT_MAX) { ray->D = normalize_len(Pend - ray->P, &ray->t); } @@ -248,7 +248,7 @@ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg, } # endif /* __SHADOW_RECORD_ALL__ */ -# ifdef __KERNEL_GPU__ +# if defined(__KERNEL_GPU__) || !defined(__SHADOW_RECORD_ALL__) /* Shadow function to compute how much light is blocked, * * Here we raytrace from one transparent surface to the next step by step. @@ -308,7 +308,7 @@ ccl_device bool shadow_blocked_transparent_stepped_loop( return true; } /* Move ray forward. */ - ray->P = ray_offset(ccl_fetch(shadow_sd, P), -ccl_fetch(shadow_sd, Ng)); + ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng); if(ray->t != FLT_MAX) { ray->D = normalize_len(Pend - ray->P, &ray->t); } @@ -359,7 +359,7 @@ ccl_device bool shadow_blocked_transparent_stepped( shadow); } -# endif /* __KERNEL_GPU__ */ +# endif /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */ #endif /* __TRANSPARENT_SHADOWS__ */ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, @@ -374,7 +374,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, #ifdef __SPLIT_KERNEL__ Ray private_ray = *ray_input; Ray *ray = &private_ray; - Intersection *isect = &kg->isect_shadow[SD_THREAD]; + Intersection *isect = &kernel_split_state.isect_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)]; #else /* __SPLIT_KERNEL__ */ Ray *ray = ray_input; Intersection isect_object; diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h index 52c05b85aee..a8fa6432542 100644 --- a/intern/cycles/kernel/kernel_subsurface.h +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -298,20 +298,20 @@ ccl_device_inline int subsurface_scatter_multi_intersect( for(int hit = 0; hit < num_eval_hits; hit++) { /* Quickly retrieve P and Ng without setting up ShaderData. */ float3 hit_P; - if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) { + if(sd->type & PRIMITIVE_TRIANGLE) { hit_P = triangle_refine_subsurface(kg, sd, &ss_isect->hits[hit], ray); } #ifdef __OBJECT_MOTION__ - else if(ccl_fetch(sd, type) & PRIMITIVE_MOTION_TRIANGLE) { + else if(sd->type & PRIMITIVE_MOTION_TRIANGLE) { float3 verts[3]; motion_triangle_vertices( kg, - ccl_fetch(sd, object), + sd->object, kernel_tex_fetch(__prim_index, ss_isect->hits[hit].prim), - ccl_fetch(sd, time), + sd->time, verts); hit_P = motion_triangle_refine_subsurface(kg, sd, diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h index 8d5bb75a428..cb1a3f40dee 100644 --- a/intern/cycles/kernel/kernel_textures.h +++ b/intern/cycles/kernel/kernel_textures.h @@ -32,6 +32,7 @@ KERNEL_TEX(uint, texture_uint, __prim_visibility) KERNEL_TEX(uint, texture_uint, __prim_index) KERNEL_TEX(uint, texture_uint, __prim_object) KERNEL_TEX(uint, texture_uint, __object_node) +KERNEL_TEX(float2, texture_float2, __prim_time) /* objects */ KERNEL_TEX(float4, texture_float4, __objects) @@ -177,7 +178,6 @@ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_085) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_086) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_087) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_088) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_089) # else /* bindless textures */ diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index f518530106c..a7faaef89ca 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -32,6 +32,11 @@ # define ccl_addr_space #endif +#if defined(__SPLIT_KERNEL__) && !defined(__COMPUTE_DEVICE_GPU__) +/* TODO(mai): need to investigate how this effects the kernel, as cpu kernel crashes without this right now */ +#define __COMPUTE_DEVICE_GPU__ +#endif + CCL_NAMESPACE_BEGIN /* constants */ @@ -56,6 +61,8 @@ CCL_NAMESPACE_BEGIN #define VOLUME_STACK_SIZE 16 +#define WORK_POOL_SIZE 64 + /* device capabilities */ #ifdef __KERNEL_CPU__ # ifdef __KERNEL_SSE2__ @@ -63,28 +70,36 @@ CCL_NAMESPACE_BEGIN # endif # define __KERNEL_SHADING__ # define __KERNEL_ADV_SHADING__ -# define __BRANCHED_PATH__ +# ifndef __SPLIT_KERNEL__ +# define __BRANCHED_PATH__ +# endif # ifdef WITH_OSL # define __OSL__ # endif -# define __SUBSURFACE__ +# ifndef __SPLIT_KERNEL__ +# define __SUBSURFACE__ +# endif # define __CMJ__ -# define __VOLUME__ -# define __VOLUME_DECOUPLED__ -# define __VOLUME_SCATTER__ -# define __SHADOW_RECORD_ALL__ -# define __VOLUME_RECORD_ALL__ +# ifndef __SPLIT_KERNEL__ +# define __VOLUME__ +# define __VOLUME_DECOUPLED__ +# define __VOLUME_SCATTER__ +# define __SHADOW_RECORD_ALL__ +# define __VOLUME_RECORD_ALL__ +# endif #endif /* __KERNEL_CPU__ */ #ifdef __KERNEL_CUDA__ # define __KERNEL_SHADING__ # define __KERNEL_ADV_SHADING__ -# define __BRANCHED_PATH__ -# define __VOLUME__ -# define __VOLUME_SCATTER__ -# define __SUBSURFACE__ -# define __CMJ__ -# define __SHADOW_RECORD_ALL__ +# ifndef __SPLIT_KERNEL__ +# define __BRANCHED_PATH__ +# define __VOLUME__ +# define __VOLUME_SCATTER__ +# define __SUBSURFACE__ +# define __CMJ__ +# define __SHADOW_RECORD_ALL__ +# endif #endif /* __KERNEL_CUDA__ */ #ifdef __KERNEL_OPENCL__ @@ -798,99 +813,77 @@ enum ShaderDataObjectFlag { SD_OBJECT_INTERSECTS_VOLUME) }; -#ifdef __SPLIT_KERNEL__ -# define SD_THREAD (get_global_id(1) * get_global_size(0) + get_global_id(0)) -# if !defined(__SPLIT_KERNEL_SOA__) - /* ShaderData is stored as an Array-of-Structures */ -# define ccl_soa_member(type, name) type soa_##name -# define ccl_fetch(s, t) (s[SD_THREAD].soa_##t) -# define ccl_fetch_array(s, t, index) (&s[SD_THREAD].soa_##t[index]) -# else - /* ShaderData is stored as an Structure-of-Arrays */ -# define SD_GLOBAL_SIZE (get_global_size(0) * get_global_size(1)) -# define SD_FIELD_SIZE(t) sizeof(((struct ShaderData*)0)->t) -# define SD_OFFSETOF(t) ((char*)(&((struct ShaderData*)0)->t) - (char*)0) -# define ccl_soa_member(type, name) type soa_##name -# define ccl_fetch(s, t) (((ShaderData*)((ccl_addr_space char*)s + SD_GLOBAL_SIZE * SD_OFFSETOF(soa_##t) + SD_FIELD_SIZE(soa_##t) * SD_THREAD - SD_OFFSETOF(soa_##t)))->soa_##t) -# define ccl_fetch_array(s, t, index) (&ccl_fetch(s, t)[index]) -# endif -#else -# define ccl_soa_member(type, name) type name -# define ccl_fetch(s, t) (s->t) -# define ccl_fetch_array(s, t, index) (&s->t[index]) -#endif - typedef ccl_addr_space struct ShaderData { /* position */ - ccl_soa_member(float3, P); + float3 P; /* smooth normal for shading */ - ccl_soa_member(float3, N); + float3 N; /* true geometric normal */ - ccl_soa_member(float3, Ng); + float3 Ng; /* view/incoming direction */ - ccl_soa_member(float3, I); + float3 I; /* shader id */ - ccl_soa_member(int, shader); + int shader; /* booleans describing shader, see ShaderDataFlag */ - ccl_soa_member(int, flag); + int flag; /* booleans describing object of the shader, see ShaderDataObjectFlag */ - ccl_soa_member(int, object_flag); + int object_flag; /* primitive id if there is one, ~0 otherwise */ - ccl_soa_member(int, prim); + int prim; /* combined type and curve segment for hair */ - ccl_soa_member(int, type); + int type; /* parametric coordinates * - barycentric weights for triangles */ - ccl_soa_member(float, u); - ccl_soa_member(float, v); + float u; + float v; /* object id if there is one, ~0 otherwise */ - ccl_soa_member(int, object); + int object; /* motion blur sample time */ - ccl_soa_member(float, time); + float time; /* length of the ray being shaded */ - ccl_soa_member(float, ray_length); + float ray_length; #ifdef __RAY_DIFFERENTIALS__ /* differential of P. these are orthogonal to Ng, not N */ - ccl_soa_member(differential3, dP); + differential3 dP; /* differential of I */ - ccl_soa_member(differential3, dI); + differential3 dI; /* differential of u, v */ - ccl_soa_member(differential, du); - ccl_soa_member(differential, dv); + differential du; + differential dv; #endif #ifdef __DPDU__ /* differential of P w.r.t. parametric coordinates. note that dPdu is * not readily suitable as a tangent for shading on triangles. */ - ccl_soa_member(float3, dPdu); - ccl_soa_member(float3, dPdv); + float3 dPdu; + float3 dPdv; #endif #ifdef __OBJECT_MOTION__ /* object <-> world space transformations, cached to avoid * re-interpolating them constantly for shading */ - ccl_soa_member(Transform, ob_tfm); - ccl_soa_member(Transform, ob_itfm); + Transform ob_tfm; + Transform ob_itfm; #endif /* Closure data, we store a fixed array of closures */ - ccl_soa_member(struct ShaderClosure, closure[MAX_CLOSURE]); - ccl_soa_member(int, num_closure); - ccl_soa_member(int, num_closure_extra); - ccl_soa_member(float, randb_closure); - ccl_soa_member(float3, svm_closure_weight); + struct ShaderClosure closure[MAX_CLOSURE]; + int num_closure; + int num_closure_extra; + float randb_closure; + float3 svm_closure_weight; /* LCG state for closures that require additional random numbers. */ - ccl_soa_member(uint, lcg_state); + uint lcg_state; /* ray start position, only set for backgrounds */ - ccl_soa_member(float3, ray_P); - ccl_soa_member(differential3, ray_dP); + float3 ray_P; + differential3 ray_dP; #ifdef __OSL__ struct KernelGlobals *osl_globals; @@ -1202,7 +1195,8 @@ typedef struct KernelBVH { int have_curves; int have_instancing; int use_qbvh; - int pad1, pad2; + int use_bvh_steps; + int pad1; } KernelBVH; static_assert_align(KernelBVH, 16); diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index c7cb29b5af2..10d0d185345 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -966,7 +966,7 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter( mis_weight = 2.0f*power_heuristic(pdf, distance_pdf); } } - if(sample_t < 1e-6f) { + if(sample_t < 1e-6f || pdf == 0.0f) { return VOLUME_PATH_SCATTERED; } diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h index 7d559b1aa31..28fc5ce1c30 100644 --- a/intern/cycles/kernel/kernel_work_stealing.h +++ b/intern/cycles/kernel/kernel_work_stealing.h @@ -17,177 +17,102 @@ #ifndef __KERNEL_WORK_STEALING_H__ #define __KERNEL_WORK_STEALING_H__ +CCL_NAMESPACE_BEGIN + /* * Utility functions for work stealing */ -#ifdef __WORK_STEALING__ - #ifdef __KERNEL_OPENCL__ # pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable #endif -uint get_group_id_with_ray_index(uint ray_index, - uint tile_dim_x, - uint tile_dim_y, - uint parallel_samples, - int dim) +ccl_device_inline uint kernel_total_work_size(KernelGlobals *kg) +{ + return kernel_split_params.w * kernel_split_params.h * kernel_split_params.num_samples; +} + +ccl_device_inline uint kernel_num_work_pools(KernelGlobals *kg) +{ + return ccl_global_size(0) * ccl_global_size(1) / WORK_POOL_SIZE; +} + +ccl_device_inline uint work_pool_from_ray_index(KernelGlobals *kg, uint ray_index) +{ + return ray_index / WORK_POOL_SIZE; +} + +ccl_device_inline uint work_pool_work_size(KernelGlobals *kg, uint work_pool) { - if(dim == 0) { - uint x_span = ray_index % (tile_dim_x * parallel_samples); - return x_span / get_local_size(0); + uint total_work_size = kernel_total_work_size(kg); + uint num_pools = kernel_num_work_pools(kg); + + if(work_pool >= num_pools || work_pool * WORK_POOL_SIZE >= total_work_size) { + return 0; + } + + uint work_size = (total_work_size / (num_pools * WORK_POOL_SIZE)) * WORK_POOL_SIZE; + + uint remainder = (total_work_size % (num_pools * WORK_POOL_SIZE)); + if(work_pool < remainder / WORK_POOL_SIZE) { + work_size += WORK_POOL_SIZE; } - else /*if(dim == 1)*/ { - kernel_assert(dim == 1); - uint y_span = ray_index / (tile_dim_x * parallel_samples); - return y_span / get_local_size(1); + else if(work_pool == remainder / WORK_POOL_SIZE) { + work_size += remainder % WORK_POOL_SIZE; } + + return work_size; } -uint get_total_work(uint tile_dim_x, - uint tile_dim_y, - uint grp_idx, - uint grp_idy, - uint num_samples) +ccl_device_inline uint get_global_work_index(KernelGlobals *kg, uint work_index, uint ray_index) { - uint threads_within_tile_border_x = - (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0) - : get_local_size(0); - uint threads_within_tile_border_y = - (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1) - : get_local_size(1); - - threads_within_tile_border_x = - (threads_within_tile_border_x == 0) ? get_local_size(0) - : threads_within_tile_border_x; - threads_within_tile_border_y = - (threads_within_tile_border_y == 0) ? get_local_size(1) - : threads_within_tile_border_y; - - return threads_within_tile_border_x * - threads_within_tile_border_y * - num_samples; + uint num_pools = kernel_num_work_pools(kg); + uint pool = work_pool_from_ray_index(kg, ray_index); + + return (work_index / WORK_POOL_SIZE) * (num_pools * WORK_POOL_SIZE) + + (pool * WORK_POOL_SIZE) + + (work_index % WORK_POOL_SIZE); } -/* Returns 0 in case there is no next work available */ -/* Returns 1 in case work assigned is valid */ -int get_next_work(ccl_global uint *work_pool, - ccl_private uint *my_work, - uint tile_dim_x, - uint tile_dim_y, - uint num_samples, - uint parallel_samples, - uint ray_index) +/* Returns true if there is work */ +ccl_device bool get_next_work(KernelGlobals *kg, ccl_private uint *work_index, uint ray_index) { - uint grp_idx = get_group_id_with_ray_index(ray_index, - tile_dim_x, - tile_dim_y, - parallel_samples, - 0); - uint grp_idy = get_group_id_with_ray_index(ray_index, - tile_dim_x, - tile_dim_y, - parallel_samples, - 1); - uint total_work = get_total_work(tile_dim_x, - tile_dim_y, - grp_idx, - grp_idy, - num_samples); - uint group_index = grp_idy * get_num_groups(0) + grp_idx; - *my_work = atomic_inc(&work_pool[group_index]); - return (*my_work < total_work) ? 1 : 0; + uint work_pool = work_pool_from_ray_index(kg, ray_index); + uint pool_size = work_pool_work_size(kg, work_pool); + + if(pool_size == 0) { + return false; + } + + *work_index = atomic_fetch_and_inc_uint32(&kernel_split_params.work_pools[work_pool]); + return (*work_index < pool_size); } -/* This function assumes that the passed my_work is valid. */ -/* Decode sample number w.r.t. assigned my_work. */ -uint get_my_sample(uint my_work, - uint tile_dim_x, - uint tile_dim_y, - uint parallel_samples, - uint ray_index) +/* This function assumes that the passed `work` is valid. */ +/* Decode sample number w.r.t. assigned `work`. */ +ccl_device uint get_work_sample(KernelGlobals *kg, uint work_index, uint ray_index) { - uint grp_idx = get_group_id_with_ray_index(ray_index, - tile_dim_x, - tile_dim_y, - parallel_samples, - 0); - uint grp_idy = get_group_id_with_ray_index(ray_index, - tile_dim_x, - tile_dim_y, - parallel_samples, - 1); - uint threads_within_tile_border_x = - (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0) - : get_local_size(0); - uint threads_within_tile_border_y = - (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1) - : get_local_size(1); - - threads_within_tile_border_x = - (threads_within_tile_border_x == 0) ? get_local_size(0) - : threads_within_tile_border_x; - threads_within_tile_border_y = - (threads_within_tile_border_y == 0) ? get_local_size(1) - : threads_within_tile_border_y; - - return my_work / - (threads_within_tile_border_x * threads_within_tile_border_y); + return get_global_work_index(kg, work_index, ray_index) / (kernel_split_params.w * kernel_split_params.h); } -/* Decode pixel and tile position w.r.t. assigned my_work. */ -void get_pixel_tile_position(ccl_private uint *pixel_x, +/* Decode pixel and tile position w.r.t. assigned `work`. */ +ccl_device void get_work_pixel_tile_position(KernelGlobals *kg, + ccl_private uint *pixel_x, ccl_private uint *pixel_y, ccl_private uint *tile_x, ccl_private uint *tile_y, - uint my_work, - uint tile_dim_x, - uint tile_dim_y, - uint tile_offset_x, - uint tile_offset_y, - uint parallel_samples, + uint work_index, uint ray_index) { - uint grp_idx = get_group_id_with_ray_index(ray_index, - tile_dim_x, - tile_dim_y, - parallel_samples, - 0); - uint grp_idy = get_group_id_with_ray_index(ray_index, - tile_dim_x, - tile_dim_y, - parallel_samples, - 1); - uint threads_within_tile_border_x = - (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0) - : get_local_size(0); - uint threads_within_tile_border_y = - (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1) - : get_local_size(1); - - threads_within_tile_border_x = - (threads_within_tile_border_x == 0) ? get_local_size(0) - : threads_within_tile_border_x; - threads_within_tile_border_y = - (threads_within_tile_border_y == 0) ? get_local_size(1) - : threads_within_tile_border_y; - - uint total_associated_pixels = - threads_within_tile_border_x * threads_within_tile_border_y; - uint work_group_pixel_index = my_work % total_associated_pixels; - uint work_group_pixel_x = - work_group_pixel_index % threads_within_tile_border_x; - uint work_group_pixel_y = - work_group_pixel_index / threads_within_tile_border_x; - - *pixel_x = - tile_offset_x + (grp_idx * get_local_size(0)) + work_group_pixel_x; - *pixel_y = - tile_offset_y + (grp_idy * get_local_size(1)) + work_group_pixel_y; - *tile_x = *pixel_x - tile_offset_x; - *tile_y = *pixel_y - tile_offset_y; + uint pixel_index = get_global_work_index(kg, work_index, ray_index) % (kernel_split_params.w*kernel_split_params.h); + + *tile_x = pixel_index % kernel_split_params.w; + *tile_y = pixel_index / kernel_split_params.w; + + *pixel_x = *tile_x + kernel_split_params.x; + *pixel_y = *tile_y + kernel_split_params.y; } -#endif /* __WORK_STEALING__ */ +CCL_NAMESPACE_END #endif /* __KERNEL_WORK_STEALING_H__ */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h index 1a07c705f1c..deb872444d0 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h @@ -49,4 +49,39 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, int offset, int sample); +/* Split kernels */ + +void KERNEL_FUNCTION_FULL_NAME(data_init)( + KernelGlobals *kg, + ccl_constant KernelData *data, + ccl_global void *split_data_buffer, + int num_elements, + ccl_global char *ray_state, + ccl_global uint *rng_state, + int start_sample, + int end_sample, + int sx, int sy, int sw, int sh, int offset, int stride, + ccl_global int *Queue_index, + int queuesize, + ccl_global char *use_queues_flag, + ccl_global unsigned int *work_pool_wgs, + unsigned int num_samples, + ccl_global float *buffer); + +#define DECLARE_SPLIT_KERNEL_FUNCTION(name) \ + void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData *data); + +DECLARE_SPLIT_KERNEL_FUNCTION(path_init) +DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect) +DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission) +DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue) +DECLARE_SPLIT_KERNEL_FUNCTION(background_buffer_update) +DECLARE_SPLIT_KERNEL_FUNCTION(shader_eval) +DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao) +DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting) +DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked) +DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) + +void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func)); + #undef KERNEL_ARCH diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index ec82d4b4c22..d6d0db4e034 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -21,17 +21,39 @@ */ #include "kernel_compat_cpu.h" -#include "kernel_math.h" -#include "kernel_types.h" -#include "kernel_globals.h" -#include "kernel_cpu_image.h" -#include "kernel_film.h" -#include "kernel_path.h" -#include "kernel_path_branched.h" -#include "kernel_bake.h" + +#ifndef __SPLIT_KERNEL__ +# include "kernel_math.h" +# include "kernel_types.h" + +# include "split/kernel_split_data.h" +# include "kernel_globals.h" + +# include "kernel_cpu_image.h" +# include "kernel_film.h" +# include "kernel_path.h" +# include "kernel_path_branched.h" +# include "kernel_bake.h" +#else +# include "split/kernel_split_common.h" + +# include "split/kernel_data_init.h" +# include "split/kernel_path_init.h" +# include "split/kernel_scene_intersect.h" +# include "split/kernel_lamp_emission.h" +# include "split/kernel_queue_enqueue.h" +# include "split/kernel_background_buffer_update.h" +# include "split/kernel_shader_eval.h" +# include "split/kernel_holdout_emission_blurring_pathtermination_ao.h" +# include "split/kernel_direct_lighting.h" +# include "split/kernel_shadow_blocked.h" +# include "split/kernel_next_iteration_setup.h" +#endif CCL_NAMESPACE_BEGIN +#ifndef __SPLIT_KERNEL__ + /* Path Tracing */ void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg, @@ -131,4 +153,55 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, } } +#else /* __SPLIT_KERNEL__ */ + +/* Split Kernel Path Tracing */ + +#define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ + void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \ + { \ + kernel_##name(kg); \ + } + +DEFINE_SPLIT_KERNEL_FUNCTION(path_init) +DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) +DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) +DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue) +DEFINE_SPLIT_KERNEL_FUNCTION(background_buffer_update) +DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) +DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao) +DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting) +DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked) +DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) + +void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func)) +{ +#define REGISTER_NAME_STRING(name) #name +#define REGISTER_EVAL_NAME(name) REGISTER_NAME_STRING(name) +#define REGISTER(name) reg(REGISTER_EVAL_NAME(KERNEL_FUNCTION_FULL_NAME(name)), (void*)KERNEL_FUNCTION_FULL_NAME(name)); + + REGISTER(path_trace); + REGISTER(convert_to_byte); + REGISTER(convert_to_half_float); + REGISTER(shader); + + REGISTER(data_init); + REGISTER(path_init); + REGISTER(scene_intersect); + REGISTER(lamp_emission); + REGISTER(queue_enqueue); + REGISTER(background_buffer_update); + REGISTER(shader_eval); + REGISTER(holdout_emission_blurring_pathtermination_ao); + REGISTER(direct_lighting); + REGISTER(shadow_blocked); + REGISTER(next_iteration_setup); + +#undef REGISTER +#undef REGISTER_EVAL_NAME +#undef REGISTER_NAME_STRING +} + +#endif /* __SPLIT_KERNEL__ */ + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split.cpp new file mode 100644 index 00000000000..30519dae53e --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_split.cpp @@ -0,0 +1,63 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CPU kernel entry points */ + +/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this + * one with SSE2 intrinsics. + */ +#if defined(__x86_64__) || defined(_M_X64) +# define __KERNEL_SSE2__ +#endif + +#define __SPLIT_KERNEL__ + +/* When building kernel for native machine detect kernel features from the flags + * set by compiler. + */ +#ifdef WITH_KERNEL_NATIVE +# ifdef __SSE2__ +# ifndef __KERNEL_SSE2__ +# define __KERNEL_SSE2__ +# endif +# endif +# ifdef __SSE3__ +# define __KERNEL_SSE3__ +# endif +# ifdef __SSSE3__ +# define __KERNEL_SSSE3__ +# endif +# ifdef __SSE4_1__ +# define __KERNEL_SSE41__ +# endif +# ifdef __AVX__ +# define __KERNEL_AVX__ +# endif +# ifdef __AVX2__ +# define __KERNEL_SSE__ +# define __KERNEL_AVX2__ +# endif +#endif + +/* quiet unused define warnings */ +#if defined(__KERNEL_SSE2__) + /* do nothing */ +#endif + +#include "kernel.h" +#define KERNEL_ARCH cpu +#include "kernel_cpu_impl.h" + diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp new file mode 100644 index 00000000000..335ad24bdc5 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp @@ -0,0 +1,38 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with AVX + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# define __KERNEL_SSE41__ +# define __KERNEL_AVX__ +#endif + +#define __SPLIT_KERNEL__ + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX +# include "kernel.h" +# define KERNEL_ARCH cpu_avx +# include "kernel_cpu_impl.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp new file mode 100644 index 00000000000..765ba96aba3 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp @@ -0,0 +1,40 @@ +/* + * Copyright 2011-2014 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with AVX2 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE__ +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# define __KERNEL_SSE41__ +# define __KERNEL_AVX__ +# define __KERNEL_AVX2__ +#endif + +#define __SPLIT_KERNEL__ + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 +# include "kernel.h" +# define KERNEL_ARCH cpu_avx2 +# include "kernel_cpu_impl.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp new file mode 100644 index 00000000000..af244c03929 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp @@ -0,0 +1,34 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with SSE2 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +#endif + +#define __SPLIT_KERNEL__ + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 +# include "kernel.h" +# define KERNEL_ARCH cpu_sse2 +# include "kernel_cpu_impl.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp new file mode 100644 index 00000000000..d1b579eeac5 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp @@ -0,0 +1,36 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +#endif + +#define __SPLIT_KERNEL__ + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 +# include "kernel.h" +# define KERNEL_ARCH cpu_sse3 +# include "kernel_cpu_impl.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp new file mode 100644 index 00000000000..83d62de5aa5 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp @@ -0,0 +1,37 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# define __KERNEL_SSE41__ +#endif + +#define __SPLIT_KERNEL__ + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 +# include "kernel.h" +# define KERNEL_ARCH cpu_sse41 +# include "kernel_cpu_impl.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu index 090ab2c50c2..52e541321e3 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel.cu @@ -16,7 +16,10 @@ /* CUDA kernel entry points */ +#ifdef __CUDA_ARCH__ + #include "../../kernel_compat_cuda.h" +#include "kernel_config.h" #include "../../kernel_math.h" #include "../../kernel_types.h" #include "../../kernel_globals.h" @@ -25,104 +28,7 @@ #include "../../kernel_path_branched.h" #include "../../kernel_bake.h" -/* device data taken from CUDA occupancy calculator */ - -#ifdef __CUDA_ARCH__ - -/* 2.0 and 2.1 */ -#if __CUDA_ARCH__ == 200 || __CUDA_ARCH__ == 210 -# define CUDA_MULTIPRESSOR_MAX_REGISTERS 32768 -# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 8 -# define CUDA_BLOCK_MAX_THREADS 1024 -# define CUDA_THREAD_MAX_REGISTERS 63 - -/* tunable parameters */ -# define CUDA_THREADS_BLOCK_WIDTH 16 -# define CUDA_KERNEL_MAX_REGISTERS 32 -# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 40 - -/* 3.0 and 3.5 */ -#elif __CUDA_ARCH__ == 300 || __CUDA_ARCH__ == 350 -# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536 -# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16 -# define CUDA_BLOCK_MAX_THREADS 1024 -# define CUDA_THREAD_MAX_REGISTERS 63 - -/* tunable parameters */ -# define CUDA_THREADS_BLOCK_WIDTH 16 -# define CUDA_KERNEL_MAX_REGISTERS 63 -# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 - -/* 3.2 */ -#elif __CUDA_ARCH__ == 320 -# define CUDA_MULTIPRESSOR_MAX_REGISTERS 32768 -# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16 -# define CUDA_BLOCK_MAX_THREADS 1024 -# define CUDA_THREAD_MAX_REGISTERS 63 - -/* tunable parameters */ -# define CUDA_THREADS_BLOCK_WIDTH 16 -# define CUDA_KERNEL_MAX_REGISTERS 63 -# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 - -/* 3.7 */ -#elif __CUDA_ARCH__ == 370 -# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536 -# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16 -# define CUDA_BLOCK_MAX_THREADS 1024 -# define CUDA_THREAD_MAX_REGISTERS 255 - -/* tunable parameters */ -# define CUDA_THREADS_BLOCK_WIDTH 16 -# define CUDA_KERNEL_MAX_REGISTERS 63 -# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 - -/* 5.0, 5.2, 5.3, 6.0, 6.1 */ -#elif __CUDA_ARCH__ >= 500 -# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536 -# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32 -# define CUDA_BLOCK_MAX_THREADS 1024 -# define CUDA_THREAD_MAX_REGISTERS 255 - -/* tunable parameters */ -# define CUDA_THREADS_BLOCK_WIDTH 16 -# define CUDA_KERNEL_MAX_REGISTERS 48 -# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 - -/* unknown architecture */ -#else -# error "Unknown or unsupported CUDA architecture, can't determine launch bounds" -#endif - -/* compute number of threads per block and minimum blocks per multiprocessor - * given the maximum number of registers per thread */ - -#define CUDA_LAUNCH_BOUNDS(threads_block_width, thread_num_registers) \ - __launch_bounds__( \ - threads_block_width*threads_block_width, \ - CUDA_MULTIPRESSOR_MAX_REGISTERS/(threads_block_width*threads_block_width*thread_num_registers) \ - ) - -/* sanity checks */ - -#if CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH > CUDA_BLOCK_MAX_THREADS -# error "Maximum number of threads per block exceeded" -#endif - -#if CUDA_MULTIPRESSOR_MAX_REGISTERS/(CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH*CUDA_KERNEL_MAX_REGISTERS) > CUDA_MULTIPROCESSOR_MAX_BLOCKS -# error "Maximum number of blocks per multiprocessor exceeded" -#endif - -#if CUDA_KERNEL_MAX_REGISTERS > CUDA_THREAD_MAX_REGISTERS -# error "Maximum number of registers per thread exceeded" -#endif - -#if CUDA_KERNEL_BRANCHED_MAX_REGISTERS > CUDA_THREAD_MAX_REGISTERS -# error "Maximum number of registers per thread exceeded" -#endif - /* kernels */ - extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) kernel_cuda_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride) diff --git a/intern/cycles/kernel/kernels/cuda/kernel_config.h b/intern/cycles/kernel/kernels/cuda/kernel_config.h new file mode 100644 index 00000000000..9fa39dc9ebb --- /dev/null +++ b/intern/cycles/kernel/kernels/cuda/kernel_config.h @@ -0,0 +1,110 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* device data taken from CUDA occupancy calculator */ + +/* 2.0 and 2.1 */ +#if __CUDA_ARCH__ == 200 || __CUDA_ARCH__ == 210 +# define CUDA_MULTIPRESSOR_MAX_REGISTERS 32768 +# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 8 +# define CUDA_BLOCK_MAX_THREADS 1024 +# define CUDA_THREAD_MAX_REGISTERS 63 + +/* tunable parameters */ +# define CUDA_THREADS_BLOCK_WIDTH 16 +# define CUDA_KERNEL_MAX_REGISTERS 32 +# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 40 + +/* 3.0 and 3.5 */ +#elif __CUDA_ARCH__ == 300 || __CUDA_ARCH__ == 350 +# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536 +# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16 +# define CUDA_BLOCK_MAX_THREADS 1024 +# define CUDA_THREAD_MAX_REGISTERS 63 + +/* tunable parameters */ +# define CUDA_THREADS_BLOCK_WIDTH 16 +# define CUDA_KERNEL_MAX_REGISTERS 63 +# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 + +/* 3.2 */ +#elif __CUDA_ARCH__ == 320 +# define CUDA_MULTIPRESSOR_MAX_REGISTERS 32768 +# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16 +# define CUDA_BLOCK_MAX_THREADS 1024 +# define CUDA_THREAD_MAX_REGISTERS 63 + +/* tunable parameters */ +# define CUDA_THREADS_BLOCK_WIDTH 16 +# define CUDA_KERNEL_MAX_REGISTERS 63 +# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 + +/* 3.7 */ +#elif __CUDA_ARCH__ == 370 +# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536 +# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16 +# define CUDA_BLOCK_MAX_THREADS 1024 +# define CUDA_THREAD_MAX_REGISTERS 255 + +/* tunable parameters */ +# define CUDA_THREADS_BLOCK_WIDTH 16 +# define CUDA_KERNEL_MAX_REGISTERS 63 +# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 + +/* 5.0, 5.2, 5.3, 6.0, 6.1 */ +#elif __CUDA_ARCH__ >= 500 +# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536 +# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32 +# define CUDA_BLOCK_MAX_THREADS 1024 +# define CUDA_THREAD_MAX_REGISTERS 255 + +/* tunable parameters */ +# define CUDA_THREADS_BLOCK_WIDTH 16 +# define CUDA_KERNEL_MAX_REGISTERS 48 +# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 + +/* unknown architecture */ +#else +# error "Unknown or unsupported CUDA architecture, can't determine launch bounds" +#endif + +/* compute number of threads per block and minimum blocks per multiprocessor + * given the maximum number of registers per thread */ + +#define CUDA_LAUNCH_BOUNDS(threads_block_width, thread_num_registers) \ + __launch_bounds__( \ + threads_block_width*threads_block_width, \ + CUDA_MULTIPRESSOR_MAX_REGISTERS/(threads_block_width*threads_block_width*thread_num_registers) \ + ) + +/* sanity checks */ + +#if CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH > CUDA_BLOCK_MAX_THREADS +# error "Maximum number of threads per block exceeded" +#endif + +#if CUDA_MULTIPRESSOR_MAX_REGISTERS/(CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH*CUDA_KERNEL_MAX_REGISTERS) > CUDA_MULTIPROCESSOR_MAX_BLOCKS +# error "Maximum number of blocks per multiprocessor exceeded" +#endif + +#if CUDA_KERNEL_MAX_REGISTERS > CUDA_THREAD_MAX_REGISTERS +# error "Maximum number of registers per thread exceeded" +#endif + +#if CUDA_KERNEL_BRANCHED_MAX_REGISTERS > CUDA_THREAD_MAX_REGISTERS +# error "Maximum number of registers per thread exceeded" +#endif + diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu new file mode 100644 index 00000000000..759475b175f --- /dev/null +++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu @@ -0,0 +1,125 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CUDA split kernel entry points */ + +#ifdef __CUDA_ARCH__ + +#define __SPLIT_KERNEL__ + +#include "../../kernel_compat_cuda.h" +#include "kernel_config.h" + +#include "../../split/kernel_split_common.h" +#include "../../split/kernel_data_init.h" +#include "../../split/kernel_path_init.h" +#include "../../split/kernel_scene_intersect.h" +#include "../../split/kernel_lamp_emission.h" +#include "../../split/kernel_queue_enqueue.h" +#include "../../split/kernel_background_buffer_update.h" +#include "../../split/kernel_shader_eval.h" +#include "../../split/kernel_holdout_emission_blurring_pathtermination_ao.h" +#include "../../split/kernel_direct_lighting.h" +#include "../../split/kernel_shadow_blocked.h" +#include "../../split/kernel_next_iteration_setup.h" + +#include "../../kernel_film.h" + +/* kernels */ +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_state_buffer_size(uint num_threads, uint *size) +{ + *size = split_data_buffer_size(NULL, num_threads); +} + +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_path_trace_data_init( + ccl_global void *split_data_buffer, + int num_elements, + ccl_global char *ray_state, + ccl_global uint *rng_state, + int start_sample, + int end_sample, + int sx, int sy, int sw, int sh, int offset, int stride, + ccl_global int *Queue_index, + int queuesize, + ccl_global char *use_queues_flag, + ccl_global unsigned int *work_pool_wgs, + unsigned int num_samples, + ccl_global float *buffer) +{ + kernel_data_init(NULL, + NULL, + split_data_buffer, + num_elements, + ray_state, + rng_state, + start_sample, + end_sample, + sx, sy, sw, sh, offset, stride, + Queue_index, + queuesize, + use_queues_flag, + work_pool_wgs, + num_samples, + buffer); +} + +#define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ + extern "C" __global__ void \ + CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \ + kernel_cuda_##name() \ + { \ + kernel_##name(NULL); \ + } + +DEFINE_SPLIT_KERNEL_FUNCTION(path_init) +DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) +DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) +DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue) +DEFINE_SPLIT_KERNEL_FUNCTION(background_buffer_update) +DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) +DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao) +DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting) +DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked) +DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) + +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) +{ + int x = sx + blockDim.x*blockIdx.x + threadIdx.x; + int y = sy + blockDim.y*blockIdx.y + threadIdx.y; + + if(x < sx + sw && y < sy + sh) + kernel_film_convert_to_byte(NULL, rgba, buffer, sample_scale, x, y, offset, stride); +} + +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) +{ + int x = sx + blockDim.x*blockIdx.x + threadIdx.x; + int y = sy + blockDim.y*blockIdx.y + threadIdx.y; + + if(x < sx + sw && y < sy + sh) + kernel_film_convert_to_half_float(NULL, rgba, buffer, sample_scale, x, y, offset, stride); +} + +#endif + diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl index a68f97857b6..52406d2f548 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel.cl @@ -67,8 +67,8 @@ __kernel void kernel_ocl_path_trace( kg->name = name; #include "../../kernel_textures.h" - int x = sx + get_global_id(0); - int y = sy + get_global_id(1); + int x = sx + ccl_global_id(0); + int y = sy + ccl_global_id(1); if(x < sx + sw && y < sy + sh) kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); @@ -96,7 +96,7 @@ __kernel void kernel_ocl_shader( kg->name = name; #include "../../kernel_textures.h" - int x = sx + get_global_id(0); + int x = sx + ccl_global_id(0); if(x < sx + sw) { kernel_shader_evaluate(kg, @@ -128,7 +128,7 @@ __kernel void kernel_ocl_bake( kg->name = name; #include "../../kernel_textures.h" - int x = sx + get_global_id(0); + int x = sx + ccl_global_id(0); if(x < sx + sw) { #ifdef __NO_BAKING__ @@ -159,8 +159,8 @@ __kernel void kernel_ocl_convert_to_byte( kg->name = name; #include "../../kernel_textures.h" - int x = sx + get_global_id(0); - int y = sy + get_global_id(1); + int x = sx + ccl_global_id(0); + int y = sy + ccl_global_id(1); if(x < sx + sw && y < sy + sh) kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); @@ -186,11 +186,27 @@ __kernel void kernel_ocl_convert_to_half_float( kg->name = name; #include "../../kernel_textures.h" - int x = sx + get_global_id(0); - int y = sy + get_global_id(1); + int x = sx + ccl_global_id(0); + int y = sy + ccl_global_id(1); if(x < sx + sw && y < sy + sh) kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); } +__kernel void kernel_ocl_zero_buffer(ccl_global float4 *buffer, ulong size, ulong offset) +{ + size_t i = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0); + + if(i < size / sizeof(float4)) { + buffer[i+offset/sizeof(float4)] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + else if(i == size / sizeof(float4)) { + ccl_global uchar *b = (ccl_global uchar*)&buffer[i+offset/sizeof(float4)]; + + for(i = 0; i < size % sizeof(float4); i++) { + *(b++) = 0; + } + } +} + #endif /* __COMPILE_ONLY_MEGAKERNEL__ */ diff --git a/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl index 1914d241eb1..47e363f6e03 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl @@ -14,112 +14,13 @@ * limitations under the License. */ +#include "kernel_compat_opencl.h" +#include "split/kernel_split_common.h" #include "split/kernel_background_buffer_update.h" __kernel void kernel_ocl_path_trace_background_buffer_update( - ccl_global char *kg, - ccl_constant KernelData *data, - ccl_global float *per_sample_output_buffers, - ccl_global uint *rng_state, - ccl_global uint *rng_coop, /* Required for buffer Update */ - ccl_global float3 *throughput_coop, /* Required for background hit processing */ - PathRadiance *PathRadiance_coop, /* Required for background hit processing and buffer Update */ - ccl_global Ray *Ray_coop, /* Required for background hit processing */ - ccl_global PathState *PathState_coop, /* Required for background hit processing */ - ccl_global float *L_transparent_coop, /* Required for background hit processing and buffer Update */ - ccl_global char *ray_state, /* Stores information on the current state of a ray */ - int sw, int sh, int sx, int sy, int stride, - int rng_state_offset_x, - int rng_state_offset_y, - int rng_state_stride, - ccl_global unsigned int *work_array, /* Denotes work of each ray */ - ccl_global int *Queue_data, /* Queues memory */ - ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ - int queuesize, /* Size (capacity) of each queue */ - int end_sample, - int start_sample, -#ifdef __WORK_STEALING__ - ccl_global unsigned int *work_pool_wgs, - unsigned int num_samples, -#endif -#ifdef __KERNEL_DEBUG__ - DebugData *debugdata_coop, -#endif - int parallel_samples) /* Number of samples to be processed in parallel */ + KernelGlobals *kg, + ccl_constant KernelData *data) { - ccl_local unsigned int local_queue_atomics; - if(get_local_id(0) == 0 && get_local_id(1) == 0) { - local_queue_atomics = 0; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); - if(ray_index == 0) { - /* We will empty this queue in this kernel. */ - Queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; - } - char enqueue_flag = 0; - ray_index = get_ray_index(ray_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - Queue_data, - queuesize, - 1); - -#ifdef __COMPUTE_DEVICE_GPU__ - /* If we are executing on a GPU device, we exit all threads that are not - * required. - * - * If we are executing on a CPU device, then we need to keep all threads - * active since we have barrier() calls later in the kernel. CPU devices, - * expect all threads to execute barrier statement. - */ - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } -#endif - -#ifndef __COMPUTE_DEVICE_GPU__ - if(ray_index != QUEUE_EMPTY_SLOT) { -#endif - enqueue_flag = - kernel_background_buffer_update((KernelGlobals *)kg, - per_sample_output_buffers, - rng_state, - rng_coop, - throughput_coop, - PathRadiance_coop, - Ray_coop, - PathState_coop, - L_transparent_coop, - ray_state, - sw, sh, sx, sy, stride, - rng_state_offset_x, - rng_state_offset_y, - rng_state_stride, - work_array, - end_sample, - start_sample, -#ifdef __WORK_STEALING__ - work_pool_wgs, - num_samples, -#endif -#ifdef __KERNEL_DEBUG__ - debugdata_coop, -#endif - parallel_samples, - ray_index); -#ifndef __COMPUTE_DEVICE_GPU__ - } -#endif - - /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS; - * These rays will be made active during next SceneIntersectkernel. - */ - enqueue_ray_index_local(ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - enqueue_flag, - queuesize, - &local_queue_atomics, - Queue_data, - Queue_index); + kernel_background_buffer_update(kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl index 18139687eab..1e3c4fa28c7 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl @@ -14,77 +14,49 @@ * limitations under the License. */ +#include "kernel_compat_opencl.h" +#include "split/kernel_split_common.h" #include "split/kernel_data_init.h" __kernel void kernel_ocl_path_trace_data_init( - ccl_global char *globals, - ccl_global char *sd_DL_shadow, + KernelGlobals *kg, ccl_constant KernelData *data, - ccl_global float *per_sample_output_buffers, + ccl_global void *split_data_buffer, + int num_elements, + ccl_global char *ray_state, ccl_global uint *rng_state, - ccl_global uint *rng_coop, /* rng array to store rng values for all rays */ - ccl_global float3 *throughput_coop, /* throughput array to store throughput values for all rays */ - ccl_global float *L_transparent_coop, /* L_transparent array to store L_transparent values for all rays */ - PathRadiance *PathRadiance_coop, /* PathRadiance array to store PathRadiance values for all rays */ - ccl_global Ray *Ray_coop, /* Ray array to store Ray information for all rays */ - ccl_global PathState *PathState_coop, /* PathState array to store PathState information for all rays */ - Intersection *Intersection_coop_shadow, - ccl_global char *ray_state, /* Stores information on current state of a ray */ #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name, #include "../../kernel_textures.h" - int start_sample, int sx, int sy, int sw, int sh, int offset, int stride, - int rng_state_offset_x, - int rng_state_offset_y, - int rng_state_stride, - ccl_global int *Queue_data, /* Memory for queues */ + int start_sample, + int end_sample, + int sx, int sy, int sw, int sh, int offset, int stride, ccl_global int *Queue_index, /* Tracks the number of elements in queues */ int queuesize, /* size (capacity) of the queue */ ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */ - ccl_global unsigned int *work_array, /* work array to store which work each ray belongs to */ -#ifdef __WORK_STEALING__ ccl_global unsigned int *work_pool_wgs, /* Work pool for each work group */ unsigned int num_samples, /* Total number of samples per pixel */ -#endif -#ifdef __KERNEL_DEBUG__ - DebugData *debugdata_coop, -#endif - int parallel_samples) /* Number of samples to be processed in parallel */ + ccl_global float *buffer) { - kernel_data_init((KernelGlobals *)globals, - (ShaderData *)sd_DL_shadow, + kernel_data_init(kg, data, - per_sample_output_buffers, - rng_state, - rng_coop, - throughput_coop, - L_transparent_coop, - PathRadiance_coop, - Ray_coop, - PathState_coop, - Intersection_coop_shadow, + split_data_buffer, + num_elements, ray_state, + rng_state, #define KERNEL_TEX(type, ttype, name) name, #include "../../kernel_textures.h" - start_sample, sx, sy, sw, sh, offset, stride, - rng_state_offset_x, - rng_state_offset_y, - rng_state_stride, - Queue_data, + start_sample, + end_sample, + sx, sy, sw, sh, offset, stride, Queue_index, queuesize, use_queues_flag, - work_array, -#ifdef __WORK_STEALING__ work_pool_wgs, num_samples, -#endif -#ifdef __KERNEL_DEBUG__ - debugdata_coop, -#endif - parallel_samples); + buffer); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl index c6a2c8d050c..5d2f46b319d 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl @@ -14,74 +14,13 @@ * limitations under the License. */ +#include "kernel_compat_opencl.h" +#include "split/kernel_split_common.h" #include "split/kernel_direct_lighting.h" __kernel void kernel_ocl_path_trace_direct_lighting( - ccl_global char *kg, - ccl_constant KernelData *data, - ccl_global char *sd, /* Required for direct lighting */ - ccl_global uint *rng_coop, /* Required for direct lighting */ - ccl_global PathState *PathState_coop, /* Required for direct lighting */ - ccl_global int *ISLamp_coop, /* Required for direct lighting */ - ccl_global Ray *LightRay_coop, /* Required for direct lighting */ - ccl_global BsdfEval *BSDFEval_coop, /* Required for direct lighting */ - ccl_global char *ray_state, /* Denotes the state of each ray */ - ccl_global int *Queue_data, /* Queue memory */ - ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ - int queuesize) /* Size (capacity) of each queue */ + KernelGlobals *kg, + ccl_constant KernelData *data) { - ccl_local unsigned int local_queue_atomics; - if(get_local_id(0) == 0 && get_local_id(1) == 0) { - local_queue_atomics = 0; - } - barrier(CLK_LOCAL_MEM_FENCE); - - char enqueue_flag = 0; - int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); - ray_index = get_ray_index(ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - Queue_data, - queuesize, - 0); - -#ifdef __COMPUTE_DEVICE_GPU__ - /* If we are executing on a GPU device, we exit all threads that are not - * required. - * - * If we are executing on a CPU device, then we need to keep all threads - * active since we have barrier() calls later in the kernel. CPU devices, - * expect all threads to execute barrier statement. - */ - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } -#endif - -#ifndef __COMPUTE_DEVICE_GPU__ - if(ray_index != QUEUE_EMPTY_SLOT) { -#endif - enqueue_flag = kernel_direct_lighting((KernelGlobals *)kg, - (ShaderData *)sd, - rng_coop, - PathState_coop, - ISLamp_coop, - LightRay_coop, - BSDFEval_coop, - ray_state, - ray_index); - -#ifndef __COMPUTE_DEVICE_GPU__ - } -#endif - -#ifdef __EMISSION__ - /* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */ - enqueue_ray_index_local(ray_index, - QUEUE_SHADOW_RAY_CAST_DL_RAYS, - enqueue_flag, - queuesize, - &local_queue_atomics, - Queue_data, - Queue_index); -#endif + kernel_direct_lighting(kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl index e063614da1a..7724b8a0bdf 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl @@ -14,110 +14,13 @@ * limitations under the License. */ +#include "kernel_compat_opencl.h" +#include "split/kernel_split_common.h" #include "split/kernel_holdout_emission_blurring_pathtermination_ao.h" __kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao( - ccl_global char *kg, - ccl_constant KernelData *data, - ccl_global char *sd, /* Required throughout the kernel except probabilistic path termination and AO */ - ccl_global float *per_sample_output_buffers, - ccl_global uint *rng_coop, /* Required for "kernel_write_data_passes" and AO */ - ccl_global float3 *throughput_coop, /* Required for handling holdout material and AO */ - ccl_global float *L_transparent_coop, /* Required for handling holdout material */ - PathRadiance *PathRadiance_coop, /* Required for "kernel_write_data_passes" and indirect primitive emission */ - ccl_global PathState *PathState_coop, /* Required throughout the kernel and AO */ - Intersection *Intersection_coop, /* Required for indirect primitive emission */ - ccl_global float3 *AOAlpha_coop, /* Required for AO */ - ccl_global float3 *AOBSDF_coop, /* Required for AO */ - ccl_global Ray *AOLightRay_coop, /* Required for AO */ - int sw, int sh, int sx, int sy, int stride, - ccl_global char *ray_state, /* Denotes the state of each ray */ - ccl_global unsigned int *work_array, /* Denotes the work that each ray belongs to */ - ccl_global int *Queue_data, /* Queue memory */ - ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ - int queuesize, /* Size (capacity) of each queue */ -#ifdef __WORK_STEALING__ - unsigned int start_sample, -#endif - int parallel_samples) /* Number of samples to be processed in parallel */ + KernelGlobals *kg, + ccl_constant KernelData *data) { - ccl_local unsigned int local_queue_atomics_bg; - ccl_local unsigned int local_queue_atomics_ao; - if(get_local_id(0) == 0 && get_local_id(1) == 0) { - local_queue_atomics_bg = 0; - local_queue_atomics_ao = 0; - } - barrier(CLK_LOCAL_MEM_FENCE); - - char enqueue_flag = 0; - char enqueue_flag_AO_SHADOW_RAY_CAST = 0; - int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); - ray_index = get_ray_index(ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - Queue_data, - queuesize, - 0); - -#ifdef __COMPUTE_DEVICE_GPU__ - /* If we are executing on a GPU device, we exit all threads that are not - * required. - * - * If we are executing on a CPU device, then we need to keep all threads - * active since we have barrier() calls later in the kernel. CPU devices, - * expect all threads to execute barrier statement. - */ - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } -#endif /* __COMPUTE_DEVICE_GPU__ */ - -#ifndef __COMPUTE_DEVICE_GPU__ - if(ray_index != QUEUE_EMPTY_SLOT) { -#endif - kernel_holdout_emission_blurring_pathtermination_ao( - (KernelGlobals *)kg, - (ShaderData *)sd, - per_sample_output_buffers, - rng_coop, - throughput_coop, - L_transparent_coop, - PathRadiance_coop, - PathState_coop, - Intersection_coop, - AOAlpha_coop, - AOBSDF_coop, - AOLightRay_coop, - sw, sh, sx, sy, stride, - ray_state, - work_array, -#ifdef __WORK_STEALING__ - start_sample, -#endif - parallel_samples, - ray_index, - &enqueue_flag, - &enqueue_flag_AO_SHADOW_RAY_CAST); -#ifndef __COMPUTE_DEVICE_GPU__ - } -#endif - - /* Enqueue RAY_UPDATE_BUFFER rays. */ - enqueue_ray_index_local(ray_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - enqueue_flag, - queuesize, - &local_queue_atomics_bg, - Queue_data, - Queue_index); - -#ifdef __AO__ - /* Enqueue to-shadow-ray-cast rays. */ - enqueue_ray_index_local(ray_index, - QUEUE_SHADOW_RAY_CAST_AO_RAYS, - enqueue_flag_AO_SHADOW_RAY_CAST, - queuesize, - &local_queue_atomics_ao, - Queue_data, - Queue_index); -#endif + kernel_holdout_emission_blurring_pathtermination_ao(kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl index 267bddc2ffc..2b84d0ea43e 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl @@ -14,67 +14,13 @@ * limitations under the License. */ +#include "kernel_compat_opencl.h" +#include "split/kernel_split_common.h" #include "split/kernel_lamp_emission.h" __kernel void kernel_ocl_path_trace_lamp_emission( - ccl_global char *kg, - ccl_constant KernelData *data, - ccl_global float3 *throughput_coop, /* Required for lamp emission */ - PathRadiance *PathRadiance_coop, /* Required for lamp emission */ - ccl_global Ray *Ray_coop, /* Required for lamp emission */ - ccl_global PathState *PathState_coop, /* Required for lamp emission */ - Intersection *Intersection_coop, /* Required for lamp emission */ - ccl_global char *ray_state, /* Denotes the state of each ray */ - int sw, int sh, - ccl_global int *Queue_data, /* Memory for queues */ - ccl_global int *Queue_index, /* Tracks the number of elements in queues */ - int queuesize, /* Size (capacity) of queues */ - ccl_global char *use_queues_flag, /* Used to decide if this kernel should use - * queues to fetch ray index - */ - int parallel_samples) /* Number of samples to be processed in parallel */ + KernelGlobals *kg, + ccl_constant KernelData *data) { - int x = get_global_id(0); - int y = get_global_id(1); - - /* We will empty this queue in this kernel. */ - if(get_global_id(0) == 0 && get_global_id(1) == 0) { - Queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; - } - /* Fetch use_queues_flag. */ - ccl_local char local_use_queues_flag; - if(get_local_id(0) == 0 && get_local_id(1) == 0) { - local_use_queues_flag = use_queues_flag[0]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int ray_index; - if(local_use_queues_flag) { - int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0); - ray_index = get_ray_index(thread_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - Queue_data, - queuesize, - 1); - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } - } else { - if(x < (sw * parallel_samples) && y < sh) { - ray_index = x + y * (sw * parallel_samples); - } else { - return; - } - } - - kernel_lamp_emission((KernelGlobals *)kg, - throughput_coop, - PathRadiance_coop, - Ray_coop, - PathState_coop, - Intersection_coop, - ray_state, - sw, sh, - use_queues_flag, - ray_index); + kernel_lamp_emission(kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl index 6d49b6294a8..e87e367fb9c 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl @@ -14,101 +14,13 @@ * limitations under the License. */ +#include "kernel_compat_opencl.h" +#include "split/kernel_split_common.h" #include "split/kernel_next_iteration_setup.h" __kernel void kernel_ocl_path_trace_next_iteration_setup( - ccl_global char *kg, - ccl_constant KernelData *data, - ccl_global char *sd, /* Required for setting up ray for next iteration */ - ccl_global uint *rng_coop, /* Required for setting up ray for next iteration */ - ccl_global float3 *throughput_coop, /* Required for setting up ray for next iteration */ - PathRadiance *PathRadiance_coop, /* Required for setting up ray for next iteration */ - ccl_global Ray *Ray_coop, /* Required for setting up ray for next iteration */ - ccl_global PathState *PathState_coop, /* Required for setting up ray for next iteration */ - ccl_global Ray *LightRay_dl_coop, /* Required for radiance update - direct lighting */ - ccl_global int *ISLamp_coop, /* Required for radiance update - direct lighting */ - ccl_global BsdfEval *BSDFEval_coop, /* Required for radiance update - direct lighting */ - ccl_global Ray *LightRay_ao_coop, /* Required for radiance update - AO */ - ccl_global float3 *AOBSDF_coop, /* Required for radiance update - AO */ - ccl_global float3 *AOAlpha_coop, /* Required for radiance update - AO */ - ccl_global char *ray_state, /* Denotes the state of each ray */ - ccl_global int *Queue_data, /* Queue memory */ - ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ - int queuesize, /* Size (capacity) of each queue */ - ccl_global char *use_queues_flag) /* flag to decide if scene_intersect kernel should - * use queues to fetch ray index */ + KernelGlobals *kg, + ccl_constant KernelData *data) { - ccl_local unsigned int local_queue_atomics; - if(get_local_id(0) == 0 && get_local_id(1) == 0) { - local_queue_atomics = 0; - } - barrier(CLK_LOCAL_MEM_FENCE); - - if(get_global_id(0) == 0 && get_global_id(1) == 0) { - /* If we are here, then it means that scene-intersect kernel - * has already been executed atleast once. From the next time, - * scene-intersect kernel may operate on queues to fetch ray index - */ - use_queues_flag[0] = 1; - - /* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and - * QUEUE_SHADOW_RAY_CAST_DL_RAYS queues that were made empty during the - * previous kernel. - */ - Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0; - Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0; - } - - char enqueue_flag = 0; - int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); - ray_index = get_ray_index(ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - Queue_data, - queuesize, - 0); - -#ifdef __COMPUTE_DEVICE_GPU__ - /* If we are executing on a GPU device, we exit all threads that are not - * required. - * - * If we are executing on a CPU device, then we need to keep all threads - * active since we have barrier() calls later in the kernel. CPU devices, - * expect all threads to execute barrier statement. - */ - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } -#endif - -#ifndef __COMPUTE_DEVICE_GPU__ - if(ray_index != QUEUE_EMPTY_SLOT) { -#endif - enqueue_flag = kernel_next_iteration_setup((KernelGlobals *)kg, - (ShaderData *)sd, - rng_coop, - throughput_coop, - PathRadiance_coop, - Ray_coop, - PathState_coop, - LightRay_dl_coop, - ISLamp_coop, - BSDFEval_coop, - LightRay_ao_coop, - AOBSDF_coop, - AOAlpha_coop, - ray_state, - use_queues_flag, - ray_index); -#ifndef __COMPUTE_DEVICE_GPU__ - } -#endif - - /* Enqueue RAY_UPDATE_BUFFER rays. */ - enqueue_ray_index_local(ray_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - enqueue_flag, - queuesize, - &local_queue_atomics, - Queue_data, - Queue_index); + kernel_next_iteration_setup(kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl new file mode 100644 index 00000000000..7e9e4a02529 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl @@ -0,0 +1,26 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel_compat_opencl.h" +#include "split/kernel_split_common.h" +#include "split/kernel_path_init.h" + +__kernel void kernel_ocl_path_trace_path_init( + KernelGlobals *kg, + ccl_constant KernelData *data) +{ + kernel_path_init(kg); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl index 3156dc255fb..9ceb6a5c3d8 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl @@ -14,93 +14,13 @@ * limitations under the License. */ -#include "../../kernel_compat_opencl.h" -#include "../../kernel_math.h" -#include "../../kernel_types.h" -#include "../../kernel_globals.h" -#include "../../kernel_queues.h" +#include "kernel_compat_opencl.h" +#include "split/kernel_split_common.h" +#include "split/kernel_queue_enqueue.h" -/* - * The kernel "kernel_queue_enqueue" enqueues rays of - * different ray state into their appropriate Queues; - * 1. Rays that have been determined to hit the background from the - * "kernel_scene_intersect" kernel - * are enqueued in QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; - * 2. Rays that have been determined to be actively participating in path-iteration will be enqueued into QUEUE_ACTIVE_AND_REGENERATED_RAYS. - * - * The input and output of the kernel is as follows, - * - * ray_state -------------------------------------------|--- kernel_queue_enqueue --|--- Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS & QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS) - * Queue_index(QUEUE_ACTIVE_AND_REGENERATED_RAYS) ------| |--- Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS & QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS) - * Queue_index(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---| | - * queuesize -------------------------------------------| | - * - * Note on Queues : - * State of queues during the first time this kernel is called : - * At entry, - * Both QUEUE_ACTIVE_AND_REGENERATED_RAYS and QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty. - * At exit, - * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE rays - * QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_HIT_BACKGROUND rays. - * - * State of queue during other times this kernel is called : - * At entry, - * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be empty. - * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will contain RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays. - * At exit, - * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE rays. - * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE, RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND rays. - */ __kernel void kernel_ocl_path_trace_queue_enqueue( - ccl_global int *Queue_data, /* Queue memory */ - ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ - ccl_global char *ray_state, /* Denotes the state of each ray */ - int queuesize) /* Size (capacity) of each queue */ + KernelGlobals *kg, + ccl_constant KernelData *data) { - /* We have only 2 cases (Hit/Not-Hit) */ - ccl_local unsigned int local_queue_atomics[2]; - - int lidx = get_local_id(1) * get_local_size(0) + get_local_id(0); - int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); - - if(lidx < 2 ) { - local_queue_atomics[lidx] = 0; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int queue_number = -1; - - if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { - queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; - } - else if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS; - } - - unsigned int my_lqidx; - if(queue_number != -1) { - my_lqidx = get_local_queue_index(queue_number, local_queue_atomics); - } - barrier(CLK_LOCAL_MEM_FENCE); - - if(lidx == 0) { - local_queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = - get_global_per_queue_offset(QUEUE_ACTIVE_AND_REGENERATED_RAYS, - local_queue_atomics, - Queue_index); - local_queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = - get_global_per_queue_offset(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - local_queue_atomics, - Queue_index); - } - barrier(CLK_LOCAL_MEM_FENCE); - - unsigned int my_gqidx; - if(queue_number != -1) { - my_gqidx = get_global_queue_index(queue_number, - queuesize, - my_lqidx, - local_queue_atomics); - Queue_data[my_gqidx] = ray_index; - } + kernel_queue_enqueue(kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl index 7f3f433c7a6..4e083e87d1c 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl @@ -14,67 +14,13 @@ * limitations under the License. */ +#include "kernel_compat_opencl.h" +#include "split/kernel_split_common.h" #include "split/kernel_scene_intersect.h" __kernel void kernel_ocl_path_trace_scene_intersect( - ccl_global char *kg, - ccl_constant KernelData *data, - ccl_global uint *rng_coop, - ccl_global Ray *Ray_coop, /* Required for scene_intersect */ - ccl_global PathState *PathState_coop, /* Required for scene_intersect */ - Intersection *Intersection_coop, /* Required for scene_intersect */ - ccl_global char *ray_state, /* Denotes the state of each ray */ - int sw, int sh, - ccl_global int *Queue_data, /* Memory for queues */ - ccl_global int *Queue_index, /* Tracks the number of elements in queues */ - int queuesize, /* Size (capacity) of queues */ - ccl_global char *use_queues_flag, /* used to decide if this kernel should use - * queues to fetch ray index */ -#ifdef __KERNEL_DEBUG__ - DebugData *debugdata_coop, -#endif - int parallel_samples) /* Number of samples to be processed in parallel */ + KernelGlobals *kg, + ccl_constant KernelData *data) { - int x = get_global_id(0); - int y = get_global_id(1); - - /* Fetch use_queues_flag */ - ccl_local char local_use_queues_flag; - if(get_local_id(0) == 0 && get_local_id(1) == 0) { - local_use_queues_flag = use_queues_flag[0]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int ray_index; - if(local_use_queues_flag) { - int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0); - ray_index = get_ray_index(thread_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - Queue_data, - queuesize, - 0); - - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } - } else { - if(x < (sw * parallel_samples) && y < sh) { - ray_index = x + y * (sw * parallel_samples); - } else { - return; - } - } - - kernel_scene_intersect((KernelGlobals *)kg, - rng_coop, - Ray_coop, - PathState_coop, - Intersection_coop, - ray_state, - sw, sh, - use_queues_flag, -#ifdef __KERNEL_DEBUG__ - debugdata_coop, -#endif - ray_index); + kernel_scene_intersect(kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl index c37856c8f30..a2b48b15928 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl @@ -14,55 +14,13 @@ * limitations under the License. */ +#include "kernel_compat_opencl.h" +#include "split/kernel_split_common.h" #include "split/kernel_shader_eval.h" __kernel void kernel_ocl_path_trace_shader_eval( - ccl_global char *kg, - ccl_constant KernelData *data, - ccl_global char *sd, /* Output ShaderData structure to be filled */ - ccl_global uint *rng_coop, /* Required for rbsdf calculation */ - ccl_global Ray *Ray_coop, /* Required for setting up shader from ray */ - ccl_global PathState *PathState_coop, /* Required for all functions in this kernel */ - Intersection *Intersection_coop, /* Required for setting up shader from ray */ - ccl_global char *ray_state, /* Denotes the state of each ray */ - ccl_global int *Queue_data, /* queue memory */ - ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ - int queuesize) /* Size (capacity) of each queue */ + KernelGlobals *kg, + ccl_constant KernelData *data) { - /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */ - ccl_local unsigned int local_queue_atomics; - if(get_local_id(0) == 0 && get_local_id(1) == 0) { - local_queue_atomics = 0; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); - ray_index = get_ray_index(ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - Queue_data, - queuesize, - 0); - - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } - - char enqueue_flag = (IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) ? 1 : 0; - enqueue_ray_index_local(ray_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - enqueue_flag, - queuesize, - &local_queue_atomics, - Queue_data, - Queue_index); - - /* Continue on with shader evaluation. */ - kernel_shader_eval((KernelGlobals *)kg, - (ShaderData *)sd, - rng_coop, - Ray_coop, - PathState_coop, - Intersection_coop, - ray_state, - ray_index); + kernel_shader_eval(kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl index edf76fba714..3693f7f9c9d 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl @@ -14,52 +14,13 @@ * limitations under the License. */ +#include "kernel_compat_opencl.h" +#include "split/kernel_split_common.h" #include "split/kernel_shadow_blocked.h" __kernel void kernel_ocl_path_trace_shadow_blocked( - ccl_global char *kg, - ccl_constant KernelData *data, - ccl_global PathState *PathState_coop, /* Required for shadow blocked */ - ccl_global Ray *LightRay_dl_coop, /* Required for direct lighting's shadow blocked */ - ccl_global Ray *LightRay_ao_coop, /* Required for AO's shadow blocked */ - ccl_global char *ray_state, - ccl_global int *Queue_data, /* Queue memory */ - ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ - int queuesize) /* Size (capacity) of each queue */ + KernelGlobals *kg, + ccl_constant KernelData *data) { - int lidx = get_local_id(1) * get_local_id(0) + get_local_id(0); - - ccl_local unsigned int ao_queue_length; - ccl_local unsigned int dl_queue_length; - if(lidx == 0) { - ao_queue_length = Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS]; - dl_queue_length = Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - /* flag determining if the current ray is to process shadow ray for AO or DL */ - char shadow_blocked_type = -1; - - int ray_index = QUEUE_EMPTY_SLOT; - int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0); - if(thread_index < ao_queue_length + dl_queue_length) { - if(thread_index < ao_queue_length) { - ray_index = get_ray_index(thread_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS, Queue_data, queuesize, 1); - shadow_blocked_type = RAY_SHADOW_RAY_CAST_AO; - } else { - ray_index = get_ray_index(thread_index - ao_queue_length, QUEUE_SHADOW_RAY_CAST_DL_RAYS, Queue_data, queuesize, 1); - shadow_blocked_type = RAY_SHADOW_RAY_CAST_DL; - } - } - - if(ray_index == QUEUE_EMPTY_SLOT) - return; - - kernel_shadow_blocked((KernelGlobals *)kg, - PathState_coop, - LightRay_dl_coop, - LightRay_ao_coop, - ray_state, - shadow_blocked_type, - ray_index); + kernel_shadow_blocked(kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl new file mode 100644 index 00000000000..0a1843ff8bd --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl @@ -0,0 +1,29 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel_compat_opencl.h" +#include "split/kernel_split_common.h" + +__kernel void kernel_ocl_path_trace_state_buffer_size( + KernelGlobals *kg, + ccl_constant KernelData *data, + uint num_threads, + ccl_global uint *size) +{ + kg->data = data; + *size = split_data_buffer_size(kg, num_threads); +} + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl b/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl deleted file mode 100644 index 88a1ed830af..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "split/kernel_sum_all_radiance.h" - -__kernel void kernel_ocl_path_trace_sum_all_radiance( - ccl_constant KernelData *data, /* To get pass_stride to offet into buffer */ - ccl_global float *buffer, /* Output buffer of RenderTile */ - ccl_global float *per_sample_output_buffer, /* Radiance contributed by all samples */ - int parallel_samples, int sw, int sh, int stride, - int buffer_offset_x, - int buffer_offset_y, - int buffer_stride, - int start_sample) -{ - kernel_sum_all_radiance(data, - buffer, - per_sample_output_buffer, - parallel_samples, - sw, sh, stride, - buffer_offset_x, - buffer_offset_y, - buffer_stride, - start_sample); -} diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp index 3614717e28c..d3a69d39597 100644 --- a/intern/cycles/kernel/osl/osl_bssrdf.cpp +++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp @@ -78,7 +78,7 @@ public: bssrdf->albedo = albedo.x; bssrdf->sharpness = sharpness; bssrdf->N = params.N; - ccl_fetch(sd, flag) |= bssrdf_setup(bssrdf, (ClosureType)type); + sd->flag |= bssrdf_setup(bssrdf, (ClosureType)type); } bssrdf = bssrdf_alloc(sd, make_float3(0.0f, weight.y, 0.0f)); @@ -89,7 +89,7 @@ public: bssrdf->albedo = albedo.y; bssrdf->sharpness = sharpness; bssrdf->N = params.N; - ccl_fetch(sd, flag) |= bssrdf_setup(bssrdf, (ClosureType)type); + sd->flag |= bssrdf_setup(bssrdf, (ClosureType)type); } bssrdf = bssrdf_alloc(sd, make_float3(0.0f, 0.0f, weight.z)); @@ -100,7 +100,7 @@ public: bssrdf->albedo = albedo.z; bssrdf->sharpness = sharpness; bssrdf->N = params.N; - ccl_fetch(sd, flag) |= bssrdf_setup(bssrdf, (ClosureType)type); + sd->flag |= bssrdf_setup(bssrdf, (ClosureType)type); } } } diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp index 94de782dca0..fe61587d179 100644 --- a/intern/cycles/kernel/osl/osl_closures.cpp +++ b/intern/cycles/kernel/osl/osl_closures.cpp @@ -42,6 +42,7 @@ #include "kernel_types.h" #include "kernel_compat_cpu.h" +#include "split/kernel_split_data_types.h" #include "kernel_globals.h" #include "kernel_montecarlo.h" #include "kernel_random.h" diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index 58bbdc33920..b08353e82d1 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -39,6 +39,7 @@ #include "util_string.h" #include "kernel_compat_cpu.h" +#include "split/kernel_split_data_types.h" #include "kernel_globals.h" #include "kernel_random.h" #include "kernel_projection.h" diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp index 0d762bbdb38..c7e9f57b18a 100644 --- a/intern/cycles/kernel/osl/osl_shader.cpp +++ b/intern/cycles/kernel/osl/osl_shader.cpp @@ -19,6 +19,7 @@ #include "kernel_compat_cpu.h" #include "kernel_montecarlo.h" #include "kernel_types.h" +#include "split/kernel_split_data_types.h" #include "kernel_globals.h" #include "geom/geom_object.h" diff --git a/intern/cycles/kernel/split/kernel_background_buffer_update.h b/intern/cycles/kernel/split/kernel_background_buffer_update.h index 9bfa71c75ef..04aaf1bbaad 100644 --- a/intern/cycles/kernel/split/kernel_background_buffer_update.h +++ b/intern/cycles/kernel/split/kernel_background_buffer_update.h @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "kernel_split_common.h" +CCL_NAMESPACE_BEGIN /* Note on kernel_background_buffer_update kernel. * This is the fourth kernel in the ray tracing logic, and the third @@ -69,80 +69,77 @@ * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty */ -ccl_device char kernel_background_buffer_update( - KernelGlobals *kg, - ccl_global float *per_sample_output_buffers, - ccl_global uint *rng_state, - ccl_global uint *rng_coop, /* Required for buffer Update */ - ccl_global float3 *throughput_coop, /* Required for background hit processing */ - PathRadiance *PathRadiance_coop, /* Required for background hit processing and buffer Update */ - ccl_global Ray *Ray_coop, /* Required for background hit processing */ - ccl_global PathState *PathState_coop, /* Required for background hit processing */ - ccl_global float *L_transparent_coop, /* Required for background hit processing and buffer Update */ - ccl_global char *ray_state, /* Stores information on the current state of a ray */ - int sw, int sh, int sx, int sy, int stride, - int rng_state_offset_x, - int rng_state_offset_y, - int rng_state_stride, - ccl_global unsigned int *work_array, /* Denotes work of each ray */ - int end_sample, - int start_sample, -#ifdef __WORK_STEALING__ - ccl_global unsigned int *work_pool_wgs, - unsigned int num_samples, -#endif -#ifdef __KERNEL_DEBUG__ - DebugData *debugdata_coop, -#endif - int parallel_samples, /* Number of samples to be processed in parallel */ - int ray_index) +ccl_device void kernel_background_buffer_update(KernelGlobals *kg) { + ccl_local unsigned int local_queue_atomics; + if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if(ray_index == 0) { + /* We will empty this queue in this kernel. */ + kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; + } char enqueue_flag = 0; + ray_index = get_ray_index(kg, ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + +#ifdef __COMPUTE_DEVICE_GPU__ + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } +#endif + +#ifndef __COMPUTE_DEVICE_GPU__ + if(ray_index != QUEUE_EMPTY_SLOT) { +#endif + + ccl_global uint *rng_state = kernel_split_params.rng_state; + int stride = kernel_split_params.stride; + + ccl_global char *ray_state = kernel_split_state.ray_state; #ifdef __KERNEL_DEBUG__ - DebugData *debug_data = &debugdata_coop[ray_index]; + DebugData *debug_data = &kernel_split_state.debug_data[ray_index]; #endif - ccl_global PathState *state = &PathState_coop[ray_index]; - PathRadiance *L = L = &PathRadiance_coop[ray_index]; - ccl_global Ray *ray = &Ray_coop[ray_index]; - ccl_global float3 *throughput = &throughput_coop[ray_index]; - ccl_global float *L_transparent = &L_transparent_coop[ray_index]; - ccl_global uint *rng = &rng_coop[ray_index]; - -#ifdef __WORK_STEALING__ - unsigned int my_work; - ccl_global float *initial_per_sample_output_buffers; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + ccl_global float *L_transparent = &kernel_split_state.L_transparent[ray_index]; + ccl_global uint *rng = &kernel_split_state.rng[ray_index]; + ccl_global float *buffer = kernel_split_params.buffer; + + unsigned int work_index; ccl_global uint *initial_rng; -#endif + unsigned int sample; unsigned int tile_x; unsigned int tile_y; unsigned int pixel_x; unsigned int pixel_y; - unsigned int my_sample_tile; -#ifdef __WORK_STEALING__ - my_work = work_array[ray_index]; - sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample; - get_pixel_tile_position(&pixel_x, &pixel_y, + work_index = kernel_split_state.work_array[ray_index]; + sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample; + get_work_pixel_tile_position(kg, &pixel_x, &pixel_y, &tile_x, &tile_y, - my_work, - sw, sh, sx, sy, - parallel_samples, + work_index, ray_index); - my_sample_tile = 0; - initial_per_sample_output_buffers = per_sample_output_buffers; initial_rng = rng_state; -#else /* __WORK_STEALING__ */ - sample = work_array[ray_index]; - int tile_index = ray_index / parallel_samples; - /* buffer and rng_state's stride is "stride". Find x and y using ray_index */ - tile_x = tile_index % sw; - tile_y = tile_index / sw; - my_sample_tile = ray_index - (tile_index * parallel_samples); -#endif /* __WORK_STEALING__ */ - - rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride; - per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride; + + rng_state += kernel_split_params.offset + pixel_x + pixel_y*stride; + buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride; if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { /* eval background shader if nothing hit */ @@ -157,7 +154,7 @@ ccl_device char kernel_background_buffer_update( if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { #ifdef __BACKGROUND__ /* sample background shader */ - float3 L_background = indirect_background(kg, kg->sd_input, state, ray); + float3 L_background = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray); path_radiance_accum_background(L, (*throughput), L_background, state->bounce); #endif ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); @@ -166,55 +163,38 @@ ccl_device char kernel_background_buffer_update( if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { float3 L_sum = path_radiance_clamp_and_sum(kg, L); - kernel_write_light_passes(kg, per_sample_output_buffers, L, sample); + kernel_write_light_passes(kg, buffer, L, sample); #ifdef __KERNEL_DEBUG__ - kernel_write_debug_passes(kg, per_sample_output_buffers, state, debug_data, sample); + kernel_write_debug_passes(kg, buffer, state, debug_data, sample); #endif float4 L_rad = make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - (*L_transparent)); /* accumulate result in output buffer */ - kernel_write_pass_float4(per_sample_output_buffers, sample, L_rad); + kernel_write_pass_float4(buffer, sample, L_rad); path_rng_end(kg, rng_state, *rng); ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); } if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { -#ifdef __WORK_STEALING__ /* We have completed current work; So get next work */ - int valid_work = get_next_work(work_pool_wgs, &my_work, sw, sh, num_samples, parallel_samples, ray_index); + int valid_work = get_next_work(kg, &work_index, ray_index); if(!valid_work) { /* If work is invalid, this means no more work is available and the thread may exit */ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE); } -#else /* __WORK_STEALING__ */ - if((sample + parallel_samples) >= end_sample) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE); - } -#endif /* __WORK_STEALING__ */ if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { -#ifdef __WORK_STEALING__ - work_array[ray_index] = my_work; + kernel_split_state.work_array[ray_index] = work_index; /* Get the sample associated with the current work */ - sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample; + sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample; /* Get pixel and tile position associated with current work */ - get_pixel_tile_position(&pixel_x, &pixel_y, &tile_x, &tile_y, my_work, sw, sh, sx, sy, parallel_samples, ray_index); - my_sample_tile = 0; + get_work_pixel_tile_position(kg, &pixel_x, &pixel_y, &tile_x, &tile_y, work_index, ray_index); /* Remap rng_state according to the current work */ - rng_state = initial_rng + ((rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride); - /* Remap per_sample_output_buffers according to the current work */ - per_sample_output_buffers = initial_per_sample_output_buffers - + (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * kernel_data.film.pass_stride; -#else /* __WORK_STEALING__ */ - work_array[ray_index] = sample + parallel_samples; - sample = work_array[ray_index]; - - /* Get ray position from ray index */ - pixel_x = sx + ((ray_index / parallel_samples) % sw); - pixel_y = sy + ((ray_index / parallel_samples) / sw); -#endif /* __WORK_STEALING__ */ + rng_state = initial_rng + kernel_split_params.offset + pixel_x + pixel_y*stride; + /* Remap buffer according to the current work */ + buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride; /* Initialize random numbers and ray. */ kernel_path_trace_setup(kg, rng_state, sample, pixel_x, pixel_y, rng, ray); @@ -226,7 +206,7 @@ ccl_device char kernel_background_buffer_update( *throughput = make_float3(1.0f, 1.0f, 1.0f); *L_transparent = 0.0f; path_radiance_init(L, kernel_data.film.use_light_pass); - path_state_init(kg, kg->sd_input, state, rng, sample, ray); + path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, rng, sample, ray); #ifdef __KERNEL_DEBUG__ debug_data_init(debug_data); #endif @@ -237,12 +217,29 @@ ccl_device char kernel_background_buffer_update( /* These rays do not participate in path-iteration. */ float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f); /* Accumulate result in output buffer. */ - kernel_write_pass_float4(per_sample_output_buffers, sample, L_rad); + kernel_write_pass_float4(buffer, sample, L_rad); path_rng_end(kg, rng_state, *rng); ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); } } } - return enqueue_flag; + +#ifndef __COMPUTE_DEVICE_GPU__ + } +#endif + + /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS; + * These rays will be made active during next SceneIntersectkernel. + */ + enqueue_ray_index_local(ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + enqueue_flag, + kernel_split_params.queue_size, + &local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); } + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h index 6e158d53d23..9b62d65ffd9 100644 --- a/intern/cycles/kernel/split/kernel_data_init.h +++ b/intern/cycles/kernel/split/kernel_data_init.h @@ -14,108 +14,105 @@ * limitations under the License. */ -#include "kernel_split_common.h" +CCL_NAMESPACE_BEGIN /* Note on kernel_data_initialization kernel * This kernel Initializes structures needed in path-iteration kernels. - * This is the first kernel in ray-tracing logic. - * - * Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE - * - * Its input and output are as follows, - * - * Un-initialized rng---------------|--- kernel_data_initialization ---|--- Initialized rng - * Un-initialized throughput -------| |--- Initialized throughput - * Un-initialized L_transparent ----| |--- Initialized L_transparent - * Un-initialized PathRadiance -----| |--- Initialized PathRadiance - * Un-initialized Ray --------------| |--- Initialized Ray - * Un-initialized PathState --------| |--- Initialized PathState - * Un-initialized QueueData --------| |--- Initialized QueueData (to QUEUE_EMPTY_SLOT) - * Un-initialized QueueIndex -------| |--- Initialized QueueIndex (to 0) - * Un-initialized use_queues_flag---| |--- Initialized use_queues_flag (to false) - * Un-initialized ray_state --------| |--- Initialized ray_state - * parallel_samples --------------- | |--- Initialized per_sample_output_buffers - * rng_state -----------------------| |--- Initialized work_array - * data ----------------------------| |--- Initialized work_pool_wgs - * start_sample --------------------| | - * sx ------------------------------| | - * sy ------------------------------| | - * sw ------------------------------| | - * sh ------------------------------| | - * stride --------------------------| | - * queuesize -----------------------| | - * num_samples ---------------------| | * * Note on Queues : * All slots in queues are initialized to queue empty slot; * The number of elements in the queues is initialized to 0; */ + +/* distributes an amount of work across all threads + * note: work done inside the loop may not show up to all threads till after the current kernel has completed + */ +#define parallel_for(kg, iter_name, work_size) \ + for(size_t _size = (work_size), \ + _global_size = ccl_global_size(0) * ccl_global_size(1), \ + _n = _size / _global_size, \ + _thread = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0), \ + iter_name = (_n > 0) ? (_thread * _n) : (_thread) \ + ; \ + (iter_name < (_thread+1) * _n) || (iter_name == _n * _global_size + _thread && _thread < _size % _global_size) \ + ; \ + iter_name = (iter_name != (_thread+1) * _n - 1) ? (iter_name + 1) : (_n * _global_size + _thread) \ + ) + +#ifndef __KERNEL_CPU__ ccl_device void kernel_data_init( +#else +void KERNEL_FUNCTION_FULL_NAME(data_init)( +#endif KernelGlobals *kg, - ShaderData *sd_DL_shadow, ccl_constant KernelData *data, - ccl_global float *per_sample_output_buffers, + ccl_global void *split_data_buffer, + int num_elements, + ccl_global char *ray_state, ccl_global uint *rng_state, - ccl_global uint *rng_coop, /* rng array to store rng values for all rays */ - ccl_global float3 *throughput_coop, /* throughput array to store throughput values for all rays */ - ccl_global float *L_transparent_coop, /* L_transparent array to store L_transparent values for all rays */ - PathRadiance *PathRadiance_coop, /* PathRadiance array to store PathRadiance values for all rays */ - ccl_global Ray *Ray_coop, /* Ray array to store Ray information for all rays */ - ccl_global PathState *PathState_coop, /* PathState array to store PathState information for all rays */ - Intersection *Intersection_coop_shadow, - ccl_global char *ray_state, /* Stores information on current state of a ray */ +#ifdef __KERNEL_OPENCL__ #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name, #include "../kernel_textures.h" +#endif - int start_sample, int sx, int sy, int sw, int sh, int offset, int stride, - int rng_state_offset_x, - int rng_state_offset_y, - int rng_state_stride, - ccl_global int *Queue_data, /* Memory for queues */ + int start_sample, + int end_sample, + int sx, int sy, int sw, int sh, int offset, int stride, ccl_global int *Queue_index, /* Tracks the number of elements in queues */ int queuesize, /* size (capacity) of the queue */ ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */ - ccl_global unsigned int *work_array, /* work array to store which work each ray belongs to */ -#ifdef __WORK_STEALING__ - ccl_global unsigned int *work_pool_wgs, /* Work pool for each work group */ - unsigned int num_samples, /* Total number of samples per pixel */ -#endif -#ifdef __KERNEL_DEBUG__ - DebugData *debugdata_coop, -#endif - int parallel_samples) /* Number of samples to be processed in parallel */ + ccl_global unsigned int *work_pools, /* Work pool for each work group */ + unsigned int num_samples, + ccl_global float *buffer) { +#ifdef __KERNEL_OPENCL__ kg->data = data; - kg->sd_input = sd_DL_shadow; - kg->isect_shadow = Intersection_coop_shadow; +#endif + + kernel_split_params.x = sx; + kernel_split_params.y = sy; + kernel_split_params.w = sw; + kernel_split_params.h = sh; + + kernel_split_params.offset = offset; + kernel_split_params.stride = stride; + + kernel_split_params.rng_state = rng_state; + + kernel_split_params.start_sample = start_sample; + kernel_split_params.end_sample = end_sample; + + kernel_split_params.work_pools = work_pools; + kernel_split_params.num_samples = num_samples; + + kernel_split_params.queue_index = Queue_index; + kernel_split_params.queue_size = queuesize; + kernel_split_params.use_queues_flag = use_queues_flag; + + kernel_split_params.buffer = buffer; + + split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state); + +#ifdef __KERNEL_OPENCL__ #define KERNEL_TEX(type, ttype, name) \ kg->name = name; #include "../kernel_textures.h" +#endif - int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0); - -#ifdef __WORK_STEALING__ - int lid = get_local_id(1) * get_local_size(0) + get_local_id(0); - /* Initialize work_pool_wgs */ - if(lid == 0) { - int group_index = get_group_id(1) * get_num_groups(0) + get_group_id(0); - work_pool_wgs[group_index] = 0; - } - barrier(CLK_LOCAL_MEM_FENCE); -#endif /* __WORK_STEALING__ */ + int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); /* Initialize queue data and queue index. */ if(thread_index < queuesize) { /* Initialize active ray queue. */ - Queue_data[QUEUE_ACTIVE_AND_REGENERATED_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT; + kernel_split_state.queue_data[QUEUE_ACTIVE_AND_REGENERATED_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT; /* Initialize background and buffer update queue. */ - Queue_data[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT; + kernel_split_state.queue_data[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT; /* Initialize shadow ray cast of AO queue. */ - Queue_data[QUEUE_SHADOW_RAY_CAST_AO_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT; + kernel_split_state.queue_data[QUEUE_SHADOW_RAY_CAST_AO_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT; /* Initialize shadow ray cast of direct lighting queue. */ - Queue_data[QUEUE_SHADOW_RAY_CAST_DL_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT; + kernel_split_state.queue_data[QUEUE_SHADOW_RAY_CAST_DL_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT; } if(thread_index == 0) { @@ -126,109 +123,32 @@ ccl_device void kernel_data_init( /* The scene-intersect kernel should not use the queues very first time. * since the queue would be empty. */ - use_queues_flag[0] = 0; + *use_queues_flag = 0; } - int x = get_global_id(0); - int y = get_global_id(1); + /* zero the tiles pixels and initialize rng_state if this is the first sample */ + if(start_sample == 0) { + parallel_for(kg, i, sw * sh * kernel_data.film.pass_stride) { + int pixel = i / kernel_data.film.pass_stride; + int pass = i % kernel_data.film.pass_stride; - if(x < (sw * parallel_samples) && y < sh) { - int ray_index = x + y * (sw * parallel_samples); + int x = sx + pixel % sw; + int y = sy + pixel / sw; - /* This is the first assignment to ray_state; - * So we dont use ASSIGN_RAY_STATE macro. - */ - ray_state[ray_index] = RAY_ACTIVE; - - unsigned int my_sample; - unsigned int pixel_x; - unsigned int pixel_y; - unsigned int tile_x; - unsigned int tile_y; - unsigned int my_sample_tile; - -#ifdef __WORK_STEALING__ - unsigned int my_work = 0; - /* Get work. */ - get_next_work(work_pool_wgs, &my_work, sw, sh, num_samples, parallel_samples, ray_index); - /* Get the sample associated with the work. */ - my_sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample; - - my_sample_tile = 0; - - /* Get pixel and tile position associated with the work. */ - get_pixel_tile_position(&pixel_x, &pixel_y, - &tile_x, &tile_y, - my_work, - sw, sh, sx, sy, - parallel_samples, - ray_index); - work_array[ray_index] = my_work; -#else /* __WORK_STEALING__ */ - unsigned int tile_index = ray_index / parallel_samples; - tile_x = tile_index % sw; - tile_y = tile_index / sw; - my_sample_tile = ray_index - (tile_index * parallel_samples); - my_sample = my_sample_tile + start_sample; - - /* Initialize work array. */ - work_array[ray_index] = my_sample ; - - /* Calculate pixel position of this ray. */ - pixel_x = sx + tile_x; - pixel_y = sy + tile_y; -#endif /* __WORK_STEALING__ */ - - rng_state += (rng_state_offset_x + tile_x) + (rng_state_offset_y + tile_y) * rng_state_stride; - - /* Initialise per_sample_output_buffers to all zeros. */ - per_sample_output_buffers += (((tile_x + (tile_y * stride)) * parallel_samples) + (my_sample_tile)) * kernel_data.film.pass_stride; - int per_sample_output_buffers_iterator = 0; - for(per_sample_output_buffers_iterator = 0; - per_sample_output_buffers_iterator < kernel_data.film.pass_stride; - per_sample_output_buffers_iterator++) - { - per_sample_output_buffers[per_sample_output_buffers_iterator] = 0.0f; - } + int index = (offset + x + y*stride) * kernel_data.film.pass_stride + pass; - /* Initialize random numbers and ray. */ - kernel_path_trace_setup(kg, - rng_state, - my_sample, - pixel_x, pixel_y, - &rng_coop[ray_index], - &Ray_coop[ray_index]); - - if(Ray_coop[ray_index].t != 0.0f) { - /* Initialize throughput, L_transparent, Ray, PathState; - * These rays proceed with path-iteration. - */ - throughput_coop[ray_index] = make_float3(1.0f, 1.0f, 1.0f); - L_transparent_coop[ray_index] = 0.0f; - path_radiance_init(&PathRadiance_coop[ray_index], kernel_data.film.use_light_pass); - path_state_init(kg, - kg->sd_input, - &PathState_coop[ray_index], - &rng_coop[ray_index], - my_sample, - &Ray_coop[ray_index]); -#ifdef __KERNEL_DEBUG__ - debug_data_init(&debugdata_coop[ray_index]); -#endif - } - else { - /* These rays do not participate in path-iteration. */ - float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - /* Accumulate result in output buffer. */ - kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad); - path_rng_end(kg, rng_state, rng_coop[ray_index]); - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); + *(buffer + index) = 0.0f; } - } - /* Mark rest of the ray-state indices as RAY_INACTIVE. */ - if(thread_index < (get_global_size(0) * get_global_size(1)) - (sh * (sw * parallel_samples))) { - /* First assignment, hence we dont use ASSIGN_RAY_STATE macro */ - ray_state[((sw * parallel_samples) * sh) + thread_index] = RAY_INACTIVE; + parallel_for(kg, i, sw * sh) { + int x = sx + i % sw; + int y = sy + i / sw; + + int index = (offset + x + y*stride); + *(rng_state + index) = hash_int_2d(x, y); + } } } + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h index 82ca18829d3..5163b8edc04 100644 --- a/intern/cycles/kernel/split/kernel_direct_lighting.h +++ b/intern/cycles/kernel/split/kernel_direct_lighting.h @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "kernel_split_common.h" +CCL_NAMESPACE_BEGIN /* Note on kernel_direct_lighting kernel. * This is the eighth kernel in the ray tracing logic. This is the seventh @@ -47,28 +47,50 @@ * QUEUE_SHADOW_RAY_CAST_DL_RAYS queue will be filled with rays for which a shadow_blocked function must be executed, after this * kernel call. Before this kernel call the QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty. */ -ccl_device char kernel_direct_lighting( - KernelGlobals *kg, - ShaderData *sd, /* Required for direct lighting */ - ccl_global uint *rng_coop, /* Required for direct lighting */ - ccl_global PathState *PathState_coop, /* Required for direct lighting */ - ccl_global int *ISLamp_coop, /* Required for direct lighting */ - ccl_global Ray *LightRay_coop, /* Required for direct lighting */ - ccl_global BsdfEval *BSDFEval_coop, /* Required for direct lighting */ - ccl_global char *ray_state, /* Denotes the state of each ray */ - int ray_index) +ccl_device void kernel_direct_lighting(KernelGlobals *kg) { + ccl_local unsigned int local_queue_atomics; + if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + char enqueue_flag = 0; - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - ccl_global PathState *state = &PathState_coop[ray_index]; + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + ray_index = get_ray_index(kg, ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); + +#ifdef __COMPUTE_DEVICE_GPU__ + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } +#endif + +#ifndef __COMPUTE_DEVICE_GPU__ + if(ray_index != QUEUE_EMPTY_SLOT) { +#endif + + if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + ShaderData *sd = &kernel_split_state.sd[ray_index]; /* direct lighting */ #ifdef __EMISSION__ if((kernel_data.integrator.use_direct_light && - (ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL))) + (sd->flag & SD_BSDF_HAS_EVAL))) { /* Sample illumination from lights to find path contribution. */ - ccl_global RNG* rng = &rng_coop[ray_index]; + ccl_global RNG* rng = &kernel_split_state.rng[ray_index]; float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT); float light_u, light_v; path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v); @@ -77,32 +99,48 @@ ccl_device char kernel_direct_lighting( LightSample ls; if(light_sample(kg, light_t, light_u, light_v, - ccl_fetch(sd, time), - ccl_fetch(sd, P), + sd->time, + sd->P, state->bounce, &ls)) { Ray light_ray; #ifdef __OBJECT_MOTION__ - light_ray.time = ccl_fetch(sd, time); + light_ray.time = sd->time; #endif BsdfEval L_light; bool is_lamp; - if(direct_emission(kg, sd, kg->sd_input, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { + if(direct_emission(kg, sd, &kernel_split_state.sd_DL_shadow[ray_index], &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { /* Write intermediate data to global memory to access from * the next kernel. */ - LightRay_coop[ray_index] = light_ray; - BSDFEval_coop[ray_index] = L_light; - ISLamp_coop[ray_index] = is_lamp; + kernel_split_state.light_ray[ray_index] = light_ray; + kernel_split_state.bsdf_eval[ray_index] = L_light; + kernel_split_state.is_lamp[ray_index] = is_lamp; /* Mark ray state for next shadow kernel. */ - ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL); + ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL); enqueue_flag = 1; } } } #endif /* __EMISSION__ */ } - return enqueue_flag; + +#ifndef __COMPUTE_DEVICE_GPU__ + } +#endif + +#ifdef __EMISSION__ + /* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_SHADOW_RAY_CAST_DL_RAYS, + enqueue_flag, + kernel_split_params.queue_size, + &local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); +#endif } + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h index 5d951b972ed..7168efa59ae 100644 --- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h +++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "kernel_split_common.h" +CCL_NAMESPACE_BEGIN /* Note on kernel_holdout_emission_blurring_pathtermination_ao kernel. * This is the sixth kernel in the ray tracing logic. This is the fifth @@ -70,101 +70,105 @@ * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays * QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with flag RAY_SHADOW_RAY_CAST_AO */ -ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( - KernelGlobals *kg, - ShaderData *sd, /* Required throughout the kernel except probabilistic path termination and AO */ - ccl_global float *per_sample_output_buffers, - ccl_global uint *rng_coop, /* Required for "kernel_write_data_passes" and AO */ - ccl_global float3 *throughput_coop, /* Required for handling holdout material and AO */ - ccl_global float *L_transparent_coop, /* Required for handling holdout material */ - PathRadiance *PathRadiance_coop, /* Required for "kernel_write_data_passes" and indirect primitive emission */ - ccl_global PathState *PathState_coop, /* Required throughout the kernel and AO */ - Intersection *Intersection_coop, /* Required for indirect primitive emission */ - ccl_global float3 *AOAlpha_coop, /* Required for AO */ - ccl_global float3 *AOBSDF_coop, /* Required for AO */ - ccl_global Ray *AOLightRay_coop, /* Required for AO */ - int sw, int sh, int sx, int sy, int stride, - ccl_global char *ray_state, /* Denotes the state of each ray */ - ccl_global unsigned int *work_array, /* Denotes the work that each ray belongs to */ -#ifdef __WORK_STEALING__ - unsigned int start_sample, -#endif - int parallel_samples, /* Number of samples to be processed in parallel */ - int ray_index, - char *enqueue_flag, - char *enqueue_flag_AO_SHADOW_RAY_CAST) +ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobals *kg) { -#ifdef __WORK_STEALING__ - unsigned int my_work; + ccl_local unsigned int local_queue_atomics_bg; + ccl_local unsigned int local_queue_atomics_ao; + if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + local_queue_atomics_bg = 0; + local_queue_atomics_ao = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + char enqueue_flag = 0; + char enqueue_flag_AO_SHADOW_RAY_CAST = 0; + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + ray_index = get_ray_index(kg, ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); + +#ifdef __COMPUTE_DEVICE_GPU__ + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } +#endif /* __COMPUTE_DEVICE_GPU__ */ + +#ifndef __COMPUTE_DEVICE_GPU__ + if(ray_index != QUEUE_EMPTY_SLOT) { +#endif + + int stride = kernel_split_params.stride; + + unsigned int work_index; unsigned int pixel_x; unsigned int pixel_y; -#endif + unsigned int tile_x; unsigned int tile_y; - int my_sample_tile; unsigned int sample; ccl_global RNG *rng = 0x0; ccl_global PathState *state = 0x0; float3 throughput; + ccl_global char *ray_state = kernel_split_state.ray_state; + ShaderData *sd = &kernel_split_state.sd[ray_index]; + ccl_global float *buffer = kernel_split_params.buffer; + if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - throughput = throughput_coop[ray_index]; - state = &PathState_coop[ray_index]; - rng = &rng_coop[ray_index]; -#ifdef __WORK_STEALING__ - my_work = work_array[ray_index]; - sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + start_sample; - get_pixel_tile_position(&pixel_x, &pixel_y, + throughput = kernel_split_state.throughput[ray_index]; + state = &kernel_split_state.path_state[ray_index]; + rng = &kernel_split_state.rng[ray_index]; + + work_index = kernel_split_state.work_array[ray_index]; + sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample; + get_work_pixel_tile_position(kg, &pixel_x, &pixel_y, &tile_x, &tile_y, - my_work, - sw, sh, sx, sy, - parallel_samples, + work_index, ray_index); - my_sample_tile = 0; -#else /* __WORK_STEALING__ */ - sample = work_array[ray_index]; - /* Buffer's stride is "stride"; Find x and y using ray_index. */ - int tile_index = ray_index / parallel_samples; - tile_x = tile_index % sw; - tile_y = tile_index / sw; - my_sample_tile = ray_index - (tile_index * parallel_samples); -#endif /* __WORK_STEALING__ */ - per_sample_output_buffers += - (((tile_x + (tile_y * stride)) * parallel_samples) + my_sample_tile) * - kernel_data.film.pass_stride; + + buffer += (kernel_split_params.offset + pixel_x + pixel_y * stride) * kernel_data.film.pass_stride; /* holdout */ #ifdef __HOLDOUT__ - if(((ccl_fetch(sd, flag) & SD_HOLDOUT) || - (ccl_fetch(sd, object_flag) & SD_OBJECT_HOLDOUT_MASK)) && + if(((sd->flag & SD_HOLDOUT) || + (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) && (state->flag & PATH_RAY_CAMERA)) { if(kernel_data.background.transparent) { float3 holdout_weight; - if(ccl_fetch(sd, object_flag) & SD_OBJECT_HOLDOUT_MASK) { + if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) { holdout_weight = make_float3(1.0f, 1.0f, 1.0f); } else { holdout_weight = shader_holdout_eval(kg, sd); } /* any throughput is ok, should all be identical here */ - L_transparent_coop[ray_index] += average(holdout_weight*throughput); + kernel_split_state.L_transparent[ray_index] += average(holdout_weight*throughput); } - if(ccl_fetch(sd, object_flag) & SD_OBJECT_HOLDOUT_MASK) { + if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) { ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); - *enqueue_flag = 1; + enqueue_flag = 1; } } #endif /* __HOLDOUT__ */ } if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - PathRadiance *L = &PathRadiance_coop[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; /* Holdout mask objects do not write data passes. */ kernel_write_data_passes(kg, - per_sample_output_buffers, + buffer, L, sd, sample, @@ -183,12 +187,12 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( #ifdef __EMISSION__ /* emission */ - if(ccl_fetch(sd, flag) & SD_EMISSION) { + if(sd->flag & SD_EMISSION) { /* TODO(sergey): is isect.t wrong here for transparent surfaces? */ float3 emission = indirect_primitive_emission( kg, sd, - Intersection_coop[ray_index].t, + kernel_split_state.isect[ray_index].t, state->flag, state->ray_pdf); path_radiance_accum_emission(L, throughput, emission, state->bounce); @@ -203,7 +207,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( if(probability == 0.0f) { ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); - *enqueue_flag = 1; + enqueue_flag = 1; } if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { @@ -211,10 +215,10 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE); if(terminate >= probability) { ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); - *enqueue_flag = 1; + enqueue_flag = 1; } else { - throughput_coop[ray_index] = throughput/probability; + kernel_split_state.throughput[ray_index] = throughput/probability; } } } @@ -224,7 +228,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { /* ambient occlusion */ if(kernel_data.integrator.use_ambient_occlusion || - (ccl_fetch(sd, flag) & SD_AO)) + (sd->flag & SD_AO)) { /* todo: solve correlation */ float bsdf_u, bsdf_v; @@ -232,29 +236,56 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( float ao_factor = kernel_data.background.ao_factor; float3 ao_N; - AOBSDF_coop[ray_index] = shader_bsdf_ao(kg, sd, ao_factor, &ao_N); - AOAlpha_coop[ray_index] = shader_bsdf_alpha(kg, sd); + kernel_split_state.ao_bsdf[ray_index] = shader_bsdf_ao(kg, sd, ao_factor, &ao_N); + kernel_split_state.ao_alpha[ray_index] = shader_bsdf_alpha(kg, sd); float3 ao_D; float ao_pdf; sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); - if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) { + if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) { Ray _ray; - _ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng)); + _ray.P = ray_offset(sd->P, sd->Ng); _ray.D = ao_D; _ray.t = kernel_data.background.ao_distance; #ifdef __OBJECT_MOTION__ - _ray.time = ccl_fetch(sd, time); + _ray.time = sd->time; #endif - _ray.dP = ccl_fetch(sd, dP); + _ray.dP = sd->dP; _ray.dD = differential3_zero(); - AOLightRay_coop[ray_index] = _ray; + kernel_split_state.ao_light_ray[ray_index] = _ray; ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO); - *enqueue_flag_AO_SHADOW_RAY_CAST = 1; + enqueue_flag_AO_SHADOW_RAY_CAST = 1; } } } #endif /* __AO__ */ + +#ifndef __COMPUTE_DEVICE_GPU__ + } +#endif + + /* Enqueue RAY_UPDATE_BUFFER rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + enqueue_flag, + kernel_split_params.queue_size, + &local_queue_atomics_bg, + kernel_split_state.queue_data, + kernel_split_params.queue_index); + +#ifdef __AO__ + /* Enqueue to-shadow-ray-cast rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_SHADOW_RAY_CAST_AO_RAYS, + enqueue_flag_AO_SHADOW_RAY_CAST, + kernel_split_params.queue_size, + &local_queue_atomics_ao, + kernel_split_state.queue_data, + kernel_split_params.queue_index); +#endif } + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h index 3bd0e361078..261625da31d 100644 --- a/intern/cycles/kernel/split/kernel_lamp_emission.h +++ b/intern/cycles/kernel/split/kernel_lamp_emission.h @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "kernel_split_common.h" +CCL_NAMESPACE_BEGIN /* Note on kernel_lamp_emission * This is the 3rd kernel in the ray-tracing logic. This is the second of the @@ -36,28 +36,39 @@ * sw -------------------------------------------------| | * sh -------------------------------------------------| | */ -ccl_device void kernel_lamp_emission( - KernelGlobals *kg, - ccl_global float3 *throughput_coop, /* Required for lamp emission */ - PathRadiance *PathRadiance_coop, /* Required for lamp emission */ - ccl_global Ray *Ray_coop, /* Required for lamp emission */ - ccl_global PathState *PathState_coop, /* Required for lamp emission */ - Intersection *Intersection_coop, /* Required for lamp emission */ - ccl_global char *ray_state, /* Denotes the state of each ray */ - int sw, int sh, - ccl_global char *use_queues_flag, /* Used to decide if this kernel should use - * queues to fetch ray index - */ - int ray_index) +ccl_device void kernel_lamp_emission(KernelGlobals *kg) { - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) || - IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) + /* We will empty this queue in this kernel. */ + if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { + kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; + } + /* Fetch use_queues_flag. */ + ccl_local char local_use_queues_flag; + if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + local_use_queues_flag = *kernel_split_params.use_queues_flag; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if(local_use_queues_flag) { + ray_index = get_ray_index(kg, ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } + } + + if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) { - PathRadiance *L = &PathRadiance_coop[ray_index]; - ccl_global PathState *state = &PathState_coop[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - float3 throughput = throughput_coop[ray_index]; - Ray ray = Ray_coop[ray_index]; + float3 throughput = kernel_split_state.throughput[ray_index]; + Ray ray = kernel_split_state.ray[ray_index]; #ifdef __LAMP_MIS__ if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) { @@ -65,7 +76,7 @@ ccl_device void kernel_lamp_emission( Ray light_ray; light_ray.P = ray.P - state->ray_t*ray.D; - state->ray_t += Intersection_coop[ray_index].t; + state->ray_t += kernel_split_state.isect[ray_index].t; light_ray.D = ray.D; light_ray.t = state->ray_t; light_ray.time = ray.time; @@ -74,10 +85,13 @@ ccl_device void kernel_lamp_emission( /* intersect with lamp */ float3 emission; - if(indirect_lamp_emission(kg, kg->sd_input, state, &light_ray, &emission)) { + if(indirect_lamp_emission(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, &light_ray, &emission)) { path_radiance_accum_emission(L, throughput, emission, state->bounce); } } #endif /* __LAMP_MIS__ */ } } + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h index 816f3a6fbff..a6f26278116 100644 --- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h +++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "kernel_split_common.h" +CCL_NAMESPACE_BEGIN /* Note on kernel_setup_next_iteration kernel. * This is the tenth kernel in the ray tracing logic. This is the ninth @@ -59,47 +59,76 @@ * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE, RAY_REGENERATED and more RAY_UPDATE_BUFFER rays. * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE and more RAY_UPDATE_BUFFER rays */ -ccl_device char kernel_next_iteration_setup( - KernelGlobals *kg, - ShaderData *sd, /* Required for setting up ray for next iteration */ - ccl_global uint *rng_coop, /* Required for setting up ray for next iteration */ - ccl_global float3 *throughput_coop, /* Required for setting up ray for next iteration */ - PathRadiance *PathRadiance_coop, /* Required for setting up ray for next iteration */ - ccl_global Ray *Ray_coop, /* Required for setting up ray for next iteration */ - ccl_global PathState *PathState_coop, /* Required for setting up ray for next iteration */ - ccl_global Ray *LightRay_dl_coop, /* Required for radiance update - direct lighting */ - ccl_global int *ISLamp_coop, /* Required for radiance update - direct lighting */ - ccl_global BsdfEval *BSDFEval_coop, /* Required for radiance update - direct lighting */ - ccl_global Ray *LightRay_ao_coop, /* Required for radiance update - AO */ - ccl_global float3 *AOBSDF_coop, /* Required for radiance update - AO */ - ccl_global float3 *AOAlpha_coop, /* Required for radiance update - AO */ - ccl_global char *ray_state, /* Denotes the state of each ray */ - ccl_global char *use_queues_flag, /* flag to decide if scene_intersect kernel should - * use queues to fetch ray index */ - int ray_index) +ccl_device void kernel_next_iteration_setup(KernelGlobals *kg) { + ccl_local unsigned int local_queue_atomics; + if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { + /* If we are here, then it means that scene-intersect kernel + * has already been executed atleast once. From the next time, + * scene-intersect kernel may operate on queues to fetch ray index + */ + *kernel_split_params.use_queues_flag = 1; + + /* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and + * QUEUE_SHADOW_RAY_CAST_DL_RAYS queues that were made empty during the + * previous kernel. + */ + kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0; + kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0; + } + char enqueue_flag = 0; + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + ray_index = get_ray_index(kg, ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); + +#ifdef __COMPUTE_DEVICE_GPU__ + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } +#endif + +#ifndef __COMPUTE_DEVICE_GPU__ + if(ray_index != QUEUE_EMPTY_SLOT) { +#endif /* Load ShaderData structure. */ PathRadiance *L = NULL; ccl_global PathState *state = NULL; + ccl_global char *ray_state = kernel_split_state.ray_state; /* Path radiance update for AO/Direct_lighting's shadow blocked. */ if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) || IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) { - state = &PathState_coop[ray_index]; - L = &PathRadiance_coop[ray_index]; - float3 _throughput = throughput_coop[ray_index]; + state = &kernel_split_state.path_state[ray_index]; + L = &kernel_split_state.path_radiance[ray_index]; + float3 _throughput = kernel_split_state.throughput[ray_index]; if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) { - float3 shadow = LightRay_ao_coop[ray_index].P; - char update_path_radiance = LightRay_ao_coop[ray_index].t; + float3 shadow = kernel_split_state.ao_light_ray[ray_index].P; + // TODO(mai): investigate correctness here + char update_path_radiance = (char)kernel_split_state.ao_light_ray[ray_index].t; if(update_path_radiance) { path_radiance_accum_ao(L, _throughput, - AOAlpha_coop[ray_index], - AOBSDF_coop[ray_index], + kernel_split_state.ao_alpha[ray_index], + kernel_split_state.ao_bsdf[ray_index], shadow, state->bounce); } @@ -107,35 +136,50 @@ ccl_device char kernel_next_iteration_setup( } if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL)) { - float3 shadow = LightRay_dl_coop[ray_index].P; - char update_path_radiance = LightRay_dl_coop[ray_index].t; + float3 shadow = kernel_split_state.light_ray[ray_index].P; + // TODO(mai): investigate correctness here + char update_path_radiance = (char)kernel_split_state.light_ray[ray_index].t; if(update_path_radiance) { - BsdfEval L_light = BSDFEval_coop[ray_index]; + BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index]; path_radiance_accum_light(L, _throughput, &L_light, shadow, 1.0f, state->bounce, - ISLamp_coop[ray_index]); + kernel_split_state.is_lamp[ray_index]); } REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL); } } if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - ccl_global float3 *throughput = &throughput_coop[ray_index]; - ccl_global Ray *ray = &Ray_coop[ray_index]; - ccl_global RNG *rng = &rng_coop[ray_index]; - state = &PathState_coop[ray_index]; - L = &PathRadiance_coop[ray_index]; + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global RNG *rng = &kernel_split_state.rng[ray_index]; + state = &kernel_split_state.path_state[ray_index]; + L = &kernel_split_state.path_radiance[ray_index]; /* Compute direct lighting and next bounce. */ - if(!kernel_path_surface_bounce(kg, rng, sd, throughput, state, L, ray)) { + if(!kernel_path_surface_bounce(kg, rng, &kernel_split_state.sd[ray_index], throughput, state, L, ray)) { ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); enqueue_flag = 1; } } - return enqueue_flag; +#ifndef __COMPUTE_DEVICE_GPU__ + } +#endif + + /* Enqueue RAY_UPDATE_BUFFER rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + enqueue_flag, + kernel_split_params.queue_size, + &local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); } + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h new file mode 100644 index 00000000000..fe3c9e1e8a2 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_path_init.h @@ -0,0 +1,100 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +/* This kernel initializes structures needed in path-iteration kernels. + * This is the first kernel in ray-tracing logic. + * + * Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE + */ + +ccl_device void kernel_path_init(KernelGlobals *kg) { + int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0); + + /* This is the first assignment to ray_state; + * So we dont use ASSIGN_RAY_STATE macro. + */ + kernel_split_state.ray_state[ray_index] = RAY_ACTIVE; + + unsigned int my_sample; + unsigned int pixel_x; + unsigned int pixel_y; + unsigned int tile_x; + unsigned int tile_y; + + unsigned int work_index = 0; + /* Get work. */ + if(!get_next_work(kg, &work_index, ray_index)) { + /* No more work, mark ray as inactive */ + kernel_split_state.ray_state[ray_index] = RAY_INACTIVE; + + return; + } + + /* Get the sample associated with the work. */ + my_sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample; + + /* Get pixel and tile position associated with the work. */ + get_work_pixel_tile_position(kg, &pixel_x, &pixel_y, + &tile_x, &tile_y, + work_index, + ray_index); + kernel_split_state.work_array[ray_index] = work_index; + + ccl_global uint *rng_state = kernel_split_params.rng_state; + rng_state += kernel_split_params.offset + pixel_x + pixel_y*kernel_split_params.stride; + + ccl_global float *buffer = kernel_split_params.buffer; + buffer += (kernel_split_params.offset + pixel_x + pixel_y * kernel_split_params.stride) * kernel_data.film.pass_stride; + + /* Initialize random numbers and ray. */ + kernel_path_trace_setup(kg, + rng_state, + my_sample, + pixel_x, pixel_y, + &kernel_split_state.rng[ray_index], + &kernel_split_state.ray[ray_index]); + + if(kernel_split_state.ray[ray_index].t != 0.0f) { + /* Initialize throughput, L_transparent, Ray, PathState; + * These rays proceed with path-iteration. + */ + kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f); + kernel_split_state.L_transparent[ray_index] = 0.0f; + path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass); + path_state_init(kg, + &kernel_split_state.sd_DL_shadow[ray_index], + &kernel_split_state.path_state[ray_index], + &kernel_split_state.rng[ray_index], + my_sample, + &kernel_split_state.ray[ray_index]); +#ifdef __KERNEL_DEBUG__ + debug_data_init(&kernel_split_state.debug_data[ray_index]); +#endif + } + else { + /* These rays do not participate in path-iteration. */ + float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + /* Accumulate result in output buffer. */ + kernel_write_pass_float4(buffer, my_sample, L_rad); + path_rng_end(kg, rng_state, kernel_split_state.rng[ray_index]); + ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE); + } +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h new file mode 100644 index 00000000000..66aad705bd4 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h @@ -0,0 +1,102 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +/* + * The kernel "kernel_queue_enqueue" enqueues rays of + * different ray state into their appropriate Queues; + * 1. Rays that have been determined to hit the background from the + * "kernel_scene_intersect" kernel + * are enqueued in QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; + * 2. Rays that have been determined to be actively participating in path-iteration will be enqueued into QUEUE_ACTIVE_AND_REGENERATED_RAYS. + * + * The input and output of the kernel is as follows, + * + * ray_state -------------------------------------------|--- kernel_queue_enqueue --|--- Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS & QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS) + * Queue_index(QUEUE_ACTIVE_AND_REGENERATED_RAYS) ------| |--- Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS & QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS) + * Queue_index(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---| | + * queuesize -------------------------------------------| | + * + * Note on Queues : + * State of queues during the first time this kernel is called : + * At entry, + * Both QUEUE_ACTIVE_AND_REGENERATED_RAYS and QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty. + * At exit, + * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE rays + * QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_HIT_BACKGROUND rays. + * + * State of queue during other times this kernel is called : + * At entry, + * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be empty. + * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will contain RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays. + * At exit, + * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE rays. + * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE, RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND rays. + */ +ccl_device void kernel_queue_enqueue(KernelGlobals *kg) +{ + /* We have only 2 cases (Hit/Not-Hit) */ + ccl_local unsigned int local_queue_atomics[2]; + + int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0); + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + + if(lidx == 0) { + local_queue_atomics[0] = 0; + local_queue_atomics[1] = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + int queue_number = -1; + + if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) { + queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; + } + else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { + queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS; + } + + unsigned int my_lqidx; + if(queue_number != -1) { + my_lqidx = get_local_queue_index(queue_number, local_queue_atomics); + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + if(lidx == 0) { + local_queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = + get_global_per_queue_offset(QUEUE_ACTIVE_AND_REGENERATED_RAYS, + local_queue_atomics, + kernel_split_params.queue_index); + local_queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = + get_global_per_queue_offset(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + local_queue_atomics, + kernel_split_params.queue_index); + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + unsigned int my_gqidx; + if(queue_number != -1) { + my_gqidx = get_global_queue_index(queue_number, + kernel_split_params.queue_size, + my_lqidx, + local_queue_atomics); + kernel_split_state.queue_data[my_gqidx] = ray_index; + } +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h index 2388580051f..a7e0c7692a2 100644 --- a/intern/cycles/kernel/split/kernel_scene_intersect.h +++ b/intern/cycles/kernel/split/kernel_scene_intersect.h @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "kernel_split_common.h" +CCL_NAMESPACE_BEGIN /* Note on kernel_scene_intersect kernel. * This is the second kernel in the ray tracing logic. This is the first @@ -61,34 +61,41 @@ * QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS - no change */ -ccl_device void kernel_scene_intersect( - KernelGlobals *kg, - ccl_global uint *rng_coop, - ccl_global Ray *Ray_coop, /* Required for scene_intersect */ - ccl_global PathState *PathState_coop, /* Required for scene_intersect */ - Intersection *Intersection_coop, /* Required for scene_intersect */ - ccl_global char *ray_state, /* Denotes the state of each ray */ - int sw, int sh, - ccl_global char *use_queues_flag, /* used to decide if this kernel should use - * queues to fetch ray index */ -#ifdef __KERNEL_DEBUG__ - DebugData *debugdata_coop, -#endif - int ray_index) +ccl_device void kernel_scene_intersect(KernelGlobals *kg) { + /* Fetch use_queues_flag */ + ccl_local char local_use_queues_flag; + if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + local_use_queues_flag = *kernel_split_params.use_queues_flag; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if(local_use_queues_flag) { + ray_index = get_ray_index(kg, ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); + + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } + } + /* All regenerated rays become active here */ - if(IS_STATE(ray_state, ray_index, RAY_REGENERATED)) - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_ACTIVE); + if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) + ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE); - if(!IS_STATE(ray_state, ray_index, RAY_ACTIVE)) + if(!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) return; #ifdef __KERNEL_DEBUG__ - DebugData *debug_data = &debugdata_coop[ray_index]; + DebugData *debug_data = &kernel_split_state.debug_data[ray_index]; #endif - Intersection *isect = &Intersection_coop[ray_index]; - PathState state = PathState_coop[ray_index]; - Ray ray = Ray_coop[ray_index]; + Intersection *isect = &kernel_split_state.isect[ray_index]; + PathState state = kernel_split_state.path_state[ray_index]; + Ray ray = kernel_split_state.ray[ray_index]; /* intersect scene */ uint visibility = path_state_ray_visibility(kg, &state); @@ -96,7 +103,7 @@ ccl_device void kernel_scene_intersect( #ifdef __HAIR__ float difl = 0.0f, extmax = 0.0f; uint lcg_state = 0; - RNG rng = rng_coop[ray_index]; + RNG rng = kernel_split_state.rng[ray_index]; if(kernel_data.bvh.have_curves) { if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) { @@ -128,6 +135,9 @@ ccl_device void kernel_scene_intersect( * These rays undergo special processing in the * background_bufferUpdate kernel. */ - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND); + ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND); } } + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h index cef64bf5f36..35ee19ddf1b 100644 --- a/intern/cycles/kernel/split/kernel_shader_eval.h +++ b/intern/cycles/kernel/split/kernel_shader_eval.h @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "kernel_split_common.h" +CCL_NAMESPACE_BEGIN /* Note on kernel_shader_eval kernel * This kernel is the 5th kernel in the ray tracing logic. This is @@ -44,27 +44,51 @@ * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE rays */ -ccl_device void kernel_shader_eval( - KernelGlobals *kg, - ShaderData *sd, /* Output ShaderData structure to be filled */ - ccl_global uint *rng_coop, /* Required for rbsdf calculation */ - ccl_global Ray *Ray_coop, /* Required for setting up shader from ray */ - ccl_global PathState *PathState_coop, /* Required for all functions in this kernel */ - Intersection *Intersection_coop, /* Required for setting up shader from ray */ - ccl_global char *ray_state, /* Denotes the state of each ray */ - int ray_index) + +ccl_device void kernel_shader_eval(KernelGlobals *kg) { - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - Intersection *isect = &Intersection_coop[ray_index]; - ccl_global uint *rng = &rng_coop[ray_index]; - ccl_global PathState *state = &PathState_coop[ray_index]; - Ray ray = Ray_coop[ray_index]; + /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */ + ccl_local unsigned int local_queue_atomics; + if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + ray_index = get_ray_index(kg, ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); + + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } + + char enqueue_flag = (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) ? 1 : 0; + enqueue_ray_index_local(ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + enqueue_flag, + kernel_split_params.queue_size, + &local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); + + /* Continue on with shader evaluation. */ + if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { + Intersection *isect = &kernel_split_state.isect[ray_index]; + ccl_global uint *rng = &kernel_split_state.rng[ray_index]; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + Ray ray = kernel_split_state.ray[ray_index]; shader_setup_from_ray(kg, - sd, + &kernel_split_state.sd[ray_index], isect, &ray); float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF); - shader_eval_surface(kg, sd, rng, state, rbsdf, state->flag, SHADER_CONTEXT_MAIN); + shader_eval_surface(kg, &kernel_split_state.sd[ray_index], rng, state, rbsdf, state->flag, SHADER_CONTEXT_MAIN); } } + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked.h b/intern/cycles/kernel/split/kernel_shadow_blocked.h index 6153af47f96..d532c7cf55b 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked.h @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "kernel_split_common.h" +CCL_NAMESPACE_BEGIN /* Note on kernel_shadow_blocked kernel. * This is the ninth kernel in the ray tracing logic. This is the eighth @@ -45,24 +45,47 @@ * and RAY_SHADOW_RAY_CAST_DL respectively, during kernel entry. * QUEUE_SHADOW_RAY_CAST_AO_RAYS and QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty at kernel exit. */ -ccl_device void kernel_shadow_blocked( - KernelGlobals *kg, - ccl_global PathState *PathState_coop, /* Required for shadow blocked */ - ccl_global Ray *LightRay_dl_coop, /* Required for direct lighting's shadow blocked */ - ccl_global Ray *LightRay_ao_coop, /* Required for AO's shadow blocked */ - ccl_global char *ray_state, - char shadow_blocked_type, - int ray_index) +ccl_device void kernel_shadow_blocked(KernelGlobals *kg) { + int lidx = ccl_local_id(1) * ccl_local_id(0) + ccl_local_id(0); + + ccl_local unsigned int ao_queue_length; + ccl_local unsigned int dl_queue_length; + if(lidx == 0) { + ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS]; + dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS]; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + /* flag determining if the current ray is to process shadow ray for AO or DL */ + char shadow_blocked_type = -1; + + int ray_index = QUEUE_EMPTY_SLOT; + int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if(thread_index < ao_queue_length + dl_queue_length) { + if(thread_index < ao_queue_length) { + ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS, + kernel_split_state.queue_data, kernel_split_params.queue_size, 1); + shadow_blocked_type = RAY_SHADOW_RAY_CAST_AO; + } else { + ray_index = get_ray_index(kg, thread_index - ao_queue_length, QUEUE_SHADOW_RAY_CAST_DL_RAYS, + kernel_split_state.queue_data, kernel_split_params.queue_size, 1); + shadow_blocked_type = RAY_SHADOW_RAY_CAST_DL; + } + } + + if(ray_index == QUEUE_EMPTY_SLOT) + return; + /* Flag determining if we need to update L. */ char update_path_radiance = 0; - if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) || - IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) + if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) || + IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) { - ccl_global PathState *state = &PathState_coop[ray_index]; - ccl_global Ray *light_ray_dl_global = &LightRay_dl_coop[ray_index]; - ccl_global Ray *light_ray_ao_global = &LightRay_ao_coop[ray_index]; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + ccl_global Ray *light_ray_dl_global = &kernel_split_state.light_ray[ray_index]; + ccl_global Ray *light_ray_ao_global = &kernel_split_state.ao_light_ray[ray_index]; ccl_global Ray *light_ray_global = shadow_blocked_type == RAY_SHADOW_RAY_CAST_AO @@ -71,7 +94,7 @@ ccl_device void kernel_shadow_blocked( float3 shadow; update_path_radiance = !(shadow_blocked(kg, - kg->sd_input, + &kernel_split_state.sd_DL_shadow[thread_index], state, light_ray_global, &shadow)); @@ -83,3 +106,6 @@ ccl_device void kernel_shadow_blocked( light_ray_global->t = update_path_radiance; } } + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h index 2135ee22b2e..dd0c3f9c941 100644 --- a/intern/cycles/kernel/split/kernel_split_common.h +++ b/intern/cycles/kernel/split/kernel_split_common.h @@ -17,11 +17,23 @@ #ifndef __KERNEL_SPLIT_H__ #define __KERNEL_SPLIT_H__ -#include "kernel_compat_opencl.h" #include "kernel_math.h" #include "kernel_types.h" + +#include "kernel_split_data.h" + #include "kernel_globals.h" -#include "kernel_image_opencl.h" + +#ifdef __OSL__ +# include "osl_shader.h" +#endif + +#ifdef __KERNEL_OPENCL__ +# include "kernel_image_opencl.h" +#endif +#ifdef __KERNEL_CPU__ +# include "../kernels/cpu/kernel_cpu_image.h" +#endif #include "util_atomic.h" diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h new file mode 100644 index 00000000000..5380c0c5de6 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_split_data.h @@ -0,0 +1,57 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __KERNEL_SPLIT_DATA_H__ +#define __KERNEL_SPLIT_DATA_H__ + +#include "kernel_split_data_types.h" +#include "kernel_globals.h" + +CCL_NAMESPACE_BEGIN + +ccl_device_inline size_t split_data_buffer_size(KernelGlobals *kg, size_t num_elements) +{ + (void)kg; /* Unused on CPU. */ + + size_t size = 0; +#define SPLIT_DATA_ENTRY(type, name, num) + align_up(num_elements * num * sizeof(type), 16) + size = size SPLIT_DATA_ENTRIES; +#undef SPLIT_DATA_ENTRY + + return size; +} + +ccl_device_inline void split_data_init(KernelGlobals *kg, + ccl_global SplitData *split_data, + size_t num_elements, + ccl_global void *data, + ccl_global char *ray_state) +{ + (void)kg; /* Unused on CPU. */ + + ccl_global char *p = (ccl_global char*)data; + +#define SPLIT_DATA_ENTRY(type, name, num) \ + split_data->name = (type*)p; p += align_up(num_elements * num * sizeof(type), 16); + SPLIT_DATA_ENTRIES +#undef SPLIT_DATA_ENTRY + + split_data->ray_state = ray_state; +} + +CCL_NAMESPACE_END + +#endif /* __KERNEL_SPLIT_DATA_H__ */ diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h new file mode 100644 index 00000000000..62e3ea45ae2 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_split_data_types.h @@ -0,0 +1,109 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __KERNEL_SPLIT_DATA_TYPES_H__ +#define __KERNEL_SPLIT_DATA_TYPES_H__ + +CCL_NAMESPACE_BEGIN + +/* parameters used by the split kernels, we use a single struct to avoid passing these to each kernel */ + +typedef struct SplitParams { + int x; + int y; + int w; + int h; + + int offset; + int stride; + + ccl_global uint *rng_state; + + int start_sample; + int end_sample; + + ccl_global unsigned int *work_pools; + unsigned int num_samples; + + ccl_global int *queue_index; + int queue_size; + ccl_global char *use_queues_flag; + + ccl_global float *buffer; +} SplitParams; + +/* Global memory variables [porting]; These memory is used for + * co-operation between different kernels; Data written by one + * kernel will be available to another kernel via this global + * memory. + */ + +/* SPLIT_DATA_ENTRY(type, name, num) */ + +#if defined(WITH_CYCLES_DEBUG) || defined(__KERNEL_DEBUG__) +/* DebugData memory */ +# define SPLIT_DATA_DEBUG_ENTRIES \ + SPLIT_DATA_ENTRY(DebugData, debug_data, 1) +#else +# define SPLIT_DATA_DEBUG_ENTRIES +#endif + +#define SPLIT_DATA_ENTRIES \ + SPLIT_DATA_ENTRY(ccl_global RNG, rng, 1) \ + SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \ + SPLIT_DATA_ENTRY(ccl_global float, L_transparent, 1) \ + SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \ + SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \ + SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \ + SPLIT_DATA_ENTRY(Intersection, isect, 1) \ + SPLIT_DATA_ENTRY(ccl_global float3, ao_alpha, 1) \ + SPLIT_DATA_ENTRY(ccl_global float3, ao_bsdf, 1) \ + SPLIT_DATA_ENTRY(ccl_global Ray, ao_light_ray, 1) \ + SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \ + SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \ + SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \ + SPLIT_DATA_ENTRY(Intersection, isect_shadow, 2) \ + SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \ + SPLIT_DATA_ENTRY(ccl_global uint, work_array, 1) \ + SPLIT_DATA_ENTRY(ShaderData, sd, 1) \ + SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 2) \ + SPLIT_DATA_DEBUG_ENTRIES \ + +/* struct that holds pointers to data in the shared state buffer */ +typedef struct SplitData { +#define SPLIT_DATA_ENTRY(type, name, num) type *name; + SPLIT_DATA_ENTRIES +#undef SPLIT_DATA_ENTRY + + /* this is actually in a separate buffer from the rest of the split state data (so it can be read back from + * the host easily) but is still used the same as the other data so we have it here in this struct as well + */ + ccl_global char *ray_state; +} SplitData; + +#ifndef __KERNEL_CUDA__ +# define kernel_split_state (kg->split_data) +# define kernel_split_params (kg->split_param_data) +#else +__device__ SplitData __split_data; +# define kernel_split_state (__split_data) +__device__ SplitParams __split_param_data; +# define kernel_split_params (__split_param_data) +#endif /* __KERNEL_CUDA__ */ + +CCL_NAMESPACE_END + +#endif /* __KERNEL_SPLIT_DATA_TYPES_H__ */ diff --git a/intern/cycles/kernel/split/kernel_sum_all_radiance.h b/intern/cycles/kernel/split/kernel_sum_all_radiance.h deleted file mode 100644 index a21e9b6a0b1..00000000000 --- a/intern/cycles/kernel/split/kernel_sum_all_radiance.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../kernel_compat_opencl.h" -#include "../kernel_math.h" -#include "../kernel_types.h" -#include "../kernel_globals.h" - -/* Since we process various samples in parallel; The output radiance of different samples - * are stored in different locations; This kernel combines the output radiance contributed - * by all different samples and stores them in the RenderTile's output buffer. - */ -ccl_device void kernel_sum_all_radiance( - ccl_constant KernelData *data, /* To get pass_stride to offet into buffer */ - ccl_global float *buffer, /* Output buffer of RenderTile */ - ccl_global float *per_sample_output_buffer, /* Radiance contributed by all samples */ - int parallel_samples, int sw, int sh, int stride, - int buffer_offset_x, - int buffer_offset_y, - int buffer_stride, - int start_sample) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if(x < sw && y < sh) { - buffer += ((buffer_offset_x + x) + (buffer_offset_y + y) * buffer_stride) * (data->film.pass_stride); - per_sample_output_buffer += ((x + y * stride) * parallel_samples) * (data->film.pass_stride); - - int sample_stride = (data->film.pass_stride); - - int sample_iterator = 0; - int pass_stride_iterator = 0; - int num_floats = data->film.pass_stride; - - for(sample_iterator = 0; sample_iterator < parallel_samples; sample_iterator++) { - for(pass_stride_iterator = 0; pass_stride_iterator < num_floats; pass_stride_iterator++) { - *(buffer + pass_stride_iterator) = - (start_sample == 0 && sample_iterator == 0) - ? *(per_sample_output_buffer + pass_stride_iterator) - : *(buffer + pass_stride_iterator) + *(per_sample_output_buffer + pass_stride_iterator); - } - per_sample_output_buffer += sample_stride; - } - } -} diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 88ec7fe6fcc..57ec9f94a3d 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -192,7 +192,7 @@ CCL_NAMESPACE_BEGIN ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, ShaderType type, int path_flag) { float stack[SVM_STACK_SIZE]; - int offset = ccl_fetch(sd, shader) & SHADER_MASK; + int offset = sd->shader & SHADER_MASK; while(1) { uint4 node = read_node(kg, &offset); diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h index 0e55c99ae97..229a3f20421 100644 --- a/intern/cycles/kernel/svm/svm_attribute.h +++ b/intern/cycles/kernel/svm/svm_attribute.h @@ -27,7 +27,7 @@ ccl_device AttributeDescriptor svm_node_attr_init(KernelGlobals *kg, ShaderData AttributeDescriptor desc; - if(ccl_fetch(sd, object) != OBJECT_NONE) { + if(sd->object != OBJECT_NONE) { desc = find_attribute(kg, sd, node.y); if(desc.offset == ATTR_STD_NOT_FOUND) { desc = attribute_not_found(); diff --git a/intern/cycles/kernel/svm/svm_bump.h b/intern/cycles/kernel/svm/svm_bump.h index 04a8c7b64e5..610d9af9e1f 100644 --- a/intern/cycles/kernel/svm/svm_bump.h +++ b/intern/cycles/kernel/svm/svm_bump.h @@ -21,9 +21,9 @@ CCL_NAMESPACE_BEGIN ccl_device void svm_node_enter_bump_eval(KernelGlobals *kg, ShaderData *sd, float *stack, uint offset) { /* save state */ - stack_store_float3(stack, offset+0, ccl_fetch(sd, P)); - stack_store_float3(stack, offset+3, ccl_fetch(sd, dP).dx); - stack_store_float3(stack, offset+6, ccl_fetch(sd, dP).dy); + stack_store_float3(stack, offset+0, sd->P); + stack_store_float3(stack, offset+3, sd->dP.dx); + stack_store_float3(stack, offset+6, sd->dP.dy); /* set state as if undisplaced */ const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POSITION_UNDISPLACED); @@ -36,18 +36,18 @@ ccl_device void svm_node_enter_bump_eval(KernelGlobals *kg, ShaderData *sd, floa object_dir_transform(kg, sd, &dPdx); object_dir_transform(kg, sd, &dPdy); - ccl_fetch(sd, P) = P; - ccl_fetch(sd, dP).dx = dPdx; - ccl_fetch(sd, dP).dy = dPdy; + sd->P = P; + sd->dP.dx = dPdx; + sd->dP.dy = dPdy; } } ccl_device void svm_node_leave_bump_eval(KernelGlobals *kg, ShaderData *sd, float *stack, uint offset) { /* restore state */ - ccl_fetch(sd, P) = stack_load_float3(stack, offset+0); - ccl_fetch(sd, dP).dx = stack_load_float3(stack, offset+3); - ccl_fetch(sd, dP).dy = stack_load_float3(stack, offset+6); + sd->P = stack_load_float3(stack, offset+0); + sd->dP.dx = stack_load_float3(stack, offset+3); + sd->dP.dy = stack_load_float3(stack, offset+6); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_camera.h b/intern/cycles/kernel/svm/svm_camera.h index 00678a49d70..90249dfd978 100644 --- a/intern/cycles/kernel/svm/svm_camera.h +++ b/intern/cycles/kernel/svm/svm_camera.h @@ -23,7 +23,7 @@ ccl_device void svm_node_camera(KernelGlobals *kg, ShaderData *sd, float *stack, float3 vector; Transform tfm = kernel_data.cam.worldtocamera; - vector = transform_point(&tfm, ccl_fetch(sd, P)); + vector = transform_point(&tfm, sd->P); zdepth = vector.z; distance = len(vector); diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index 017d697f9f8..1885e1af851 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -25,13 +25,13 @@ ccl_device void svm_node_glass_setup(ShaderData *sd, MicrofacetBsdf *bsdf, int t bsdf->alpha_y = 0.0f; bsdf->alpha_x = 0.0f; bsdf->ior = eta; - ccl_fetch(sd, flag) |= bsdf_refraction_setup(bsdf); + sd->flag |= bsdf_refraction_setup(bsdf); } else { bsdf->alpha_y = 0.0f; bsdf->alpha_x = 0.0f; bsdf->ior = 0.0f; - ccl_fetch(sd, flag) |= bsdf_reflection_setup(bsdf); + sd->flag |= bsdf_reflection_setup(bsdf); } } else if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID) { @@ -40,9 +40,9 @@ ccl_device void svm_node_glass_setup(ShaderData *sd, MicrofacetBsdf *bsdf, int t bsdf->ior = eta; if(refract) - ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_refraction_setup(bsdf); + sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf); else - ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_setup(bsdf); + sd->flag |= bsdf_microfacet_beckmann_setup(bsdf); } else { bsdf->alpha_x = roughness; @@ -50,9 +50,9 @@ ccl_device void svm_node_glass_setup(ShaderData *sd, MicrofacetBsdf *bsdf, int t bsdf->ior = eta; if(refract) - ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_refraction_setup(bsdf); + sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf); else - ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_setup(bsdf); + sd->flag |= bsdf_microfacet_ggx_setup(bsdf); } } @@ -70,14 +70,14 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * if(mix_weight == 0.0f) return; - float3 N = stack_valid(data_node.x)? stack_load_float3(stack, data_node.x): ccl_fetch(sd, N); + float3 N = stack_valid(data_node.x)? stack_load_float3(stack, data_node.x): sd->N; float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __uint_as_float(node.z); float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __uint_as_float(node.w); switch(type) { case CLOSURE_BSDF_DIFFUSE_ID: { - float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + float3 weight = sd->svm_closure_weight * mix_weight; OrenNayarBsdf *bsdf = (OrenNayarBsdf*)bsdf_alloc(sd, sizeof(OrenNayarBsdf), weight); if(bsdf) { @@ -86,31 +86,31 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * float roughness = param1; if(roughness == 0.0f) { - ccl_fetch(sd, flag) |= bsdf_diffuse_setup((DiffuseBsdf*)bsdf); + sd->flag |= bsdf_diffuse_setup((DiffuseBsdf*)bsdf); } else { bsdf->roughness = roughness; - ccl_fetch(sd, flag) |= bsdf_oren_nayar_setup(bsdf); + sd->flag |= bsdf_oren_nayar_setup(bsdf); } } break; } case CLOSURE_BSDF_TRANSLUCENT_ID: { - float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + float3 weight = sd->svm_closure_weight * mix_weight; DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight); if(bsdf) { bsdf->N = N; - ccl_fetch(sd, flag) |= bsdf_translucent_setup(bsdf); + sd->flag |= bsdf_translucent_setup(bsdf); } break; } case CLOSURE_BSDF_TRANSPARENT_ID: { - float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + float3 weight = sd->svm_closure_weight * mix_weight; ShaderClosure *bsdf = bsdf_alloc(sd, sizeof(ShaderClosure), weight); if(bsdf) { - ccl_fetch(sd, flag) |= bsdf_transparent_setup(bsdf); + sd->flag |= bsdf_transparent_setup(bsdf); } break; } @@ -123,7 +123,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * if(!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE)) break; #endif - float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + float3 weight = sd->svm_closure_weight * mix_weight; MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); if(bsdf) { @@ -135,21 +135,21 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * /* setup bsdf */ if(type == CLOSURE_BSDF_REFLECTION_ID) - ccl_fetch(sd, flag) |= bsdf_reflection_setup(bsdf); + sd->flag |= bsdf_reflection_setup(bsdf); else if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_ID) - ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_setup(bsdf); + sd->flag |= bsdf_microfacet_beckmann_setup(bsdf); else if(type == CLOSURE_BSDF_MICROFACET_GGX_ID) - ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_setup(bsdf); + sd->flag |= bsdf_microfacet_ggx_setup(bsdf); else if(type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID) { kernel_assert(stack_valid(data_node.z)); bsdf->extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); if(bsdf->extra) { bsdf->extra->color = stack_load_float3(stack, data_node.z); - ccl_fetch(sd, flag) |= bsdf_microfacet_multi_ggx_setup(bsdf); + sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf); } } else - ccl_fetch(sd, flag) |= bsdf_ashikhmin_shirley_setup(bsdf); + sd->flag |= bsdf_ashikhmin_shirley_setup(bsdf); } break; @@ -161,7 +161,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * if(!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) break; #endif - float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + float3 weight = sd->svm_closure_weight * mix_weight; MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); if(bsdf) { @@ -169,7 +169,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bsdf->extra = NULL; float eta = fmaxf(param2, 1e-5f); - eta = (ccl_fetch(sd, flag) & SD_BACKFACING)? 1.0f/eta: eta; + eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta; /* setup bsdf */ if(type == CLOSURE_BSDF_REFRACTION_ID) { @@ -177,7 +177,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bsdf->alpha_y = 0.0f; bsdf->ior = eta; - ccl_fetch(sd, flag) |= bsdf_refraction_setup(bsdf); + sd->flag |= bsdf_refraction_setup(bsdf); } else { bsdf->alpha_x = param1; @@ -185,9 +185,9 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bsdf->ior = eta; if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID) - ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_refraction_setup(bsdf); + sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf); else - ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_refraction_setup(bsdf); + sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf); } } @@ -203,14 +203,14 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * break; } #endif - float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + float3 weight = sd->svm_closure_weight * mix_weight; /* index of refraction */ float eta = fmaxf(param2, 1e-5f); - eta = (ccl_fetch(sd, flag) & SD_BACKFACING)? 1.0f/eta: eta; + eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta; /* fresnel */ - float cosNO = dot(N, ccl_fetch(sd, I)); + float cosNO = dot(N, sd->I); float fresnel = fresnel_dielectric_cos(cosNO, eta); float roughness = param1; @@ -249,7 +249,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * if(!kernel_data.integrator.caustics_reflective && !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) break; #endif - float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + float3 weight = sd->svm_closure_weight * mix_weight; MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); @@ -261,13 +261,13 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bsdf->alpha_x = param1; bsdf->alpha_y = param1; float eta = fmaxf(param2, 1e-5f); - bsdf->ior = (ccl_fetch(sd, flag) & SD_BACKFACING)? 1.0f/eta: eta; + bsdf->ior = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta; kernel_assert(stack_valid(data_node.z)); bsdf->extra->color = stack_load_float3(stack, data_node.z); /* setup bsdf */ - ccl_fetch(sd, flag) |= bsdf_microfacet_multi_ggx_glass_setup(bsdf); + sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf); } break; @@ -280,7 +280,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * if(!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE)) break; #endif - float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + float3 weight = sd->svm_closure_weight * mix_weight; MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); if(bsdf) { @@ -310,33 +310,33 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bsdf->ior = 0.0f; if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID) { - ccl_fetch(sd, flag) |= bsdf_microfacet_beckmann_aniso_setup(bsdf); + sd->flag |= bsdf_microfacet_beckmann_aniso_setup(bsdf); } else if(type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID) { - ccl_fetch(sd, flag) |= bsdf_microfacet_ggx_aniso_setup(bsdf); + sd->flag |= bsdf_microfacet_ggx_aniso_setup(bsdf); } else if(type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID) { kernel_assert(stack_valid(data_node.w)); bsdf->extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); if(bsdf->extra) { bsdf->extra->color = stack_load_float3(stack, data_node.w); - ccl_fetch(sd, flag) |= bsdf_microfacet_multi_ggx_aniso_setup(bsdf); + sd->flag |= bsdf_microfacet_multi_ggx_aniso_setup(bsdf); } } else - ccl_fetch(sd, flag) |= bsdf_ashikhmin_shirley_aniso_setup(bsdf); + sd->flag |= bsdf_ashikhmin_shirley_aniso_setup(bsdf); } break; } case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: { - float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + float3 weight = sd->svm_closure_weight * mix_weight; VelvetBsdf *bsdf = (VelvetBsdf*)bsdf_alloc(sd, sizeof(VelvetBsdf), weight); if(bsdf) { bsdf->N = N; bsdf->sigma = saturate(param1); - ccl_fetch(sd, flag) |= bsdf_ashikhmin_velvet_setup(bsdf); + sd->flag |= bsdf_ashikhmin_velvet_setup(bsdf); } break; } @@ -346,7 +346,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * break; #endif case CLOSURE_BSDF_DIFFUSE_TOON_ID: { - float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + float3 weight = sd->svm_closure_weight * mix_weight; ToonBsdf *bsdf = (ToonBsdf*)bsdf_alloc(sd, sizeof(ToonBsdf), weight); if(bsdf) { @@ -355,18 +355,18 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bsdf->smooth = param2; if(type == CLOSURE_BSDF_DIFFUSE_TOON_ID) - ccl_fetch(sd, flag) |= bsdf_diffuse_toon_setup(bsdf); + sd->flag |= bsdf_diffuse_toon_setup(bsdf); else - ccl_fetch(sd, flag) |= bsdf_glossy_toon_setup(bsdf); + sd->flag |= bsdf_glossy_toon_setup(bsdf); } break; } #ifdef __HAIR__ case CLOSURE_BSDF_HAIR_REFLECTION_ID: case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: { - float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + float3 weight = sd->svm_closure_weight * mix_weight; - if(ccl_fetch(sd, flag) & SD_BACKFACING && ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { + if(sd->flag & SD_BACKFACING && sd->type & PRIMITIVE_ALL_CURVE) { ShaderClosure *bsdf = bsdf_alloc(sd, sizeof(ShaderClosure), weight); if(bsdf) { @@ -376,7 +376,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * * better figure out a way to skip backfaces from rays * spawned by transmission from the front */ bsdf->weight = make_float3(1.0f, 1.0f, 1.0f); - ccl_fetch(sd, flag) |= bsdf_transparent_setup(bsdf); + sd->flag |= bsdf_transparent_setup(bsdf); } } else { @@ -390,18 +390,18 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * if(stack_valid(data_node.y)) { bsdf->T = normalize(stack_load_float3(stack, data_node.y)); } - else if(!(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE)) { - bsdf->T = normalize(ccl_fetch(sd, dPdv)); + else if(!(sd->type & PRIMITIVE_ALL_CURVE)) { + bsdf->T = normalize(sd->dPdv); bsdf->offset = 0.0f; } else - bsdf->T = normalize(ccl_fetch(sd, dPdu)); + bsdf->T = normalize(sd->dPdu); if(type == CLOSURE_BSDF_HAIR_REFLECTION_ID) { - ccl_fetch(sd, flag) |= bsdf_hair_reflection_setup(bsdf); + sd->flag |= bsdf_hair_reflection_setup(bsdf); } else { - ccl_fetch(sd, flag) |= bsdf_hair_transmission_setup(bsdf); + sd->flag |= bsdf_hair_transmission_setup(bsdf); } } } @@ -414,8 +414,8 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * case CLOSURE_BSSRDF_CUBIC_ID: case CLOSURE_BSSRDF_GAUSSIAN_ID: case CLOSURE_BSSRDF_BURLEY_ID: { - float3 albedo = ccl_fetch(sd, svm_closure_weight); - float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight; + float3 albedo = sd->svm_closure_weight; + float3 weight = sd->svm_closure_weight * mix_weight; float sample_weight = fabsf(average(weight)); /* disable in case of diffuse ancestor, can't see it well then and @@ -441,7 +441,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bssrdf->albedo = albedo.x; bssrdf->sharpness = sharpness; bssrdf->N = N; - ccl_fetch(sd, flag) |= bssrdf_setup(bssrdf, (ClosureType)type); + sd->flag |= bssrdf_setup(bssrdf, (ClosureType)type); } bssrdf = bssrdf_alloc(sd, make_float3(0.0f, weight.y, 0.0f)); @@ -452,7 +452,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bssrdf->albedo = albedo.y; bssrdf->sharpness = sharpness; bssrdf->N = N; - ccl_fetch(sd, flag) |= bssrdf_setup(bssrdf, (ClosureType)type); + sd->flag |= bssrdf_setup(bssrdf, (ClosureType)type); } bssrdf = bssrdf_alloc(sd, make_float3(0.0f, 0.0f, weight.z)); @@ -463,7 +463,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * bssrdf->albedo = albedo.z; bssrdf->sharpness = sharpness; bssrdf->N = N; - ccl_fetch(sd, flag) |= bssrdf_setup(bssrdf, (ClosureType)type); + sd->flag |= bssrdf_setup(bssrdf, (ClosureType)type); } } @@ -493,21 +493,21 @@ ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float switch(type) { case CLOSURE_VOLUME_ABSORPTION_ID: { - float3 weight = (make_float3(1.0f, 1.0f, 1.0f) - ccl_fetch(sd, svm_closure_weight)) * mix_weight * density; + float3 weight = (make_float3(1.0f, 1.0f, 1.0f) - sd->svm_closure_weight) * mix_weight * density; ShaderClosure *sc = closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_NONE_ID, weight); if(sc) { - ccl_fetch(sd, flag) |= volume_absorption_setup(sc); + sd->flag |= volume_absorption_setup(sc); } break; } case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: { - float3 weight = ccl_fetch(sd, svm_closure_weight) * mix_weight * density; + float3 weight = sd->svm_closure_weight * mix_weight * density; HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume*)bsdf_alloc(sd, sizeof(HenyeyGreensteinVolume), weight); if(volume) { volume->g = param2; /* g */ - ccl_fetch(sd, flag) |= volume_henyey_greenstein_setup(volume); + sd->flag |= volume_henyey_greenstein_setup(volume); } break; } @@ -527,12 +527,12 @@ ccl_device void svm_node_closure_emission(ShaderData *sd, float *stack, uint4 no if(mix_weight == 0.0f) return; - closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_EMISSION_ID, ccl_fetch(sd, svm_closure_weight) * mix_weight); + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_EMISSION_ID, sd->svm_closure_weight * mix_weight); } else - closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_EMISSION_ID, ccl_fetch(sd, svm_closure_weight)); + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_EMISSION_ID, sd->svm_closure_weight); - ccl_fetch(sd, flag) |= SD_EMISSION; + sd->flag |= SD_EMISSION; } ccl_device void svm_node_closure_background(ShaderData *sd, float *stack, uint4 node) @@ -545,10 +545,10 @@ ccl_device void svm_node_closure_background(ShaderData *sd, float *stack, uint4 if(mix_weight == 0.0f) return; - closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_BACKGROUND_ID, ccl_fetch(sd, svm_closure_weight) * mix_weight); + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_BACKGROUND_ID, sd->svm_closure_weight * mix_weight); } else - closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_BACKGROUND_ID, ccl_fetch(sd, svm_closure_weight)); + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_BACKGROUND_ID, sd->svm_closure_weight); } ccl_device void svm_node_closure_holdout(ShaderData *sd, float *stack, uint4 node) @@ -561,12 +561,12 @@ ccl_device void svm_node_closure_holdout(ShaderData *sd, float *stack, uint4 nod if(mix_weight == 0.0f) return; - closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, ccl_fetch(sd, svm_closure_weight) * mix_weight); + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, sd->svm_closure_weight * mix_weight); } else - closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, ccl_fetch(sd, svm_closure_weight)); + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, sd->svm_closure_weight); - ccl_fetch(sd, flag) |= SD_HOLDOUT; + sd->flag |= SD_HOLDOUT; } ccl_device void svm_node_closure_ambient_occlusion(ShaderData *sd, float *stack, uint4 node) @@ -579,19 +579,19 @@ ccl_device void svm_node_closure_ambient_occlusion(ShaderData *sd, float *stack, if(mix_weight == 0.0f) return; - closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_AMBIENT_OCCLUSION_ID, ccl_fetch(sd, svm_closure_weight) * mix_weight); + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_AMBIENT_OCCLUSION_ID, sd->svm_closure_weight * mix_weight); } else - closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_AMBIENT_OCCLUSION_ID, ccl_fetch(sd, svm_closure_weight)); + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_AMBIENT_OCCLUSION_ID, sd->svm_closure_weight); - ccl_fetch(sd, flag) |= SD_AO; + sd->flag |= SD_AO; } /* Closure Nodes */ ccl_device_inline void svm_node_closure_store_weight(ShaderData *sd, float3 weight) { - ccl_fetch(sd, svm_closure_weight) = weight; + sd->svm_closure_weight = weight; } ccl_device void svm_node_closure_set_weight(ShaderData *sd, uint r, uint g, uint b) @@ -641,7 +641,7 @@ ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node) ccl_device void svm_node_set_normal(KernelGlobals *kg, ShaderData *sd, float *stack, uint in_direction, uint out_normal) { float3 normal = stack_load_float3(stack, in_direction); - ccl_fetch(sd, N) = normal; + sd->N = normal; stack_store_float3(stack, out_normal, normal); } diff --git a/intern/cycles/kernel/svm/svm_displace.h b/intern/cycles/kernel/svm/svm_displace.h index 890ab41aaaa..c94fa130af7 100644 --- a/intern/cycles/kernel/svm/svm_displace.h +++ b/intern/cycles/kernel/svm/svm_displace.h @@ -25,10 +25,10 @@ ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stac uint normal_offset, distance_offset, invert, use_object_space; decode_node_uchar4(node.y, &normal_offset, &distance_offset, &invert, &use_object_space); - float3 normal_in = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): ccl_fetch(sd, N); + float3 normal_in = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): sd->N; - float3 dPdx = ccl_fetch(sd, dP).dx; - float3 dPdy = ccl_fetch(sd, dP).dy; + float3 dPdx = sd->dP.dx; + float3 dPdy = sd->dP.dy; if(use_object_space) { object_inverse_normal_transform(kg, sd, &normal_in); @@ -80,14 +80,14 @@ ccl_device void svm_node_set_displacement(KernelGlobals *kg, ShaderData *sd, flo { float d = stack_load_float(stack, fac_offset); - float3 dP = ccl_fetch(sd, N); + float3 dP = sd->N; object_inverse_normal_transform(kg, sd, &dP); dP *= d*0.1f; /* todo: get rid of this factor */ object_dir_transform(kg, sd, &dP); - ccl_fetch(sd, P) += dP; + sd->P += dP; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_fresnel.h b/intern/cycles/kernel/svm/svm_fresnel.h index 23c97d80cb0..3703ec55015 100644 --- a/intern/cycles/kernel/svm/svm_fresnel.h +++ b/intern/cycles/kernel/svm/svm_fresnel.h @@ -23,12 +23,12 @@ ccl_device void svm_node_fresnel(ShaderData *sd, float *stack, uint ior_offset, uint normal_offset, out_offset; decode_node_uchar4(node, &normal_offset, &out_offset, NULL, NULL); float eta = (stack_valid(ior_offset))? stack_load_float(stack, ior_offset): __uint_as_float(ior_value); - float3 normal_in = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): ccl_fetch(sd, N); + float3 normal_in = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): sd->N; eta = fmaxf(eta, 1e-5f); - eta = (ccl_fetch(sd, flag) & SD_BACKFACING)? 1.0f/eta: eta; + eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta; - float f = fresnel_dielectric_cos(dot(ccl_fetch(sd, I), normal_in), eta); + float f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta); stack_store_float(stack, out_offset, f); } @@ -44,18 +44,18 @@ ccl_device void svm_node_layer_weight(ShaderData *sd, float *stack, uint4 node) decode_node_uchar4(node.w, &type, &normal_offset, &out_offset, NULL); float blend = (stack_valid(blend_offset))? stack_load_float(stack, blend_offset): __uint_as_float(blend_value); - float3 normal_in = (stack_valid(normal_offset))? stack_load_float3(stack, normal_offset): ccl_fetch(sd, N); + float3 normal_in = (stack_valid(normal_offset))? stack_load_float3(stack, normal_offset): sd->N; float f; if(type == NODE_LAYER_WEIGHT_FRESNEL) { float eta = fmaxf(1.0f - blend, 1e-5f); - eta = (ccl_fetch(sd, flag) & SD_BACKFACING)? eta: 1.0f/eta; + eta = (sd->flag & SD_BACKFACING)? eta: 1.0f/eta; - f = fresnel_dielectric_cos(dot(ccl_fetch(sd, I), normal_in), eta); + f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta); } else { - f = fabsf(dot(ccl_fetch(sd, I), normal_in)); + f = fabsf(dot(sd->I, normal_in)); if(blend != 0.5f) { blend = clamp(blend, 0.0f, 1.0f-1e-5f); diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h index 7d512f7ff4d..4a09d9f6653 100644 --- a/intern/cycles/kernel/svm/svm_geometry.h +++ b/intern/cycles/kernel/svm/svm_geometry.h @@ -27,15 +27,15 @@ ccl_device_inline void svm_node_geometry(KernelGlobals *kg, float3 data; switch(type) { - case NODE_GEOM_P: data = ccl_fetch(sd, P); break; - case NODE_GEOM_N: data = ccl_fetch(sd, N); break; + case NODE_GEOM_P: data = sd->P; break; + case NODE_GEOM_N: data = sd->N; break; #ifdef __DPDU__ case NODE_GEOM_T: data = primitive_tangent(kg, sd); break; #endif - case NODE_GEOM_I: data = ccl_fetch(sd, I); break; - case NODE_GEOM_Ng: data = ccl_fetch(sd, Ng); break; + case NODE_GEOM_I: data = sd->I; break; + case NODE_GEOM_Ng: data = sd->Ng; break; #ifdef __UV__ - case NODE_GEOM_uv: data = make_float3(ccl_fetch(sd, u), ccl_fetch(sd, v), 0.0f); break; + case NODE_GEOM_uv: data = make_float3(sd->u, sd->v, 0.0f); break; #endif } @@ -48,8 +48,8 @@ ccl_device void svm_node_geometry_bump_dx(KernelGlobals *kg, ShaderData *sd, flo float3 data; switch(type) { - case NODE_GEOM_P: data = ccl_fetch(sd, P) + ccl_fetch(sd, dP).dx; break; - case NODE_GEOM_uv: data = make_float3(ccl_fetch(sd, u) + ccl_fetch(sd, du).dx, ccl_fetch(sd, v) + ccl_fetch(sd, dv).dx, 0.0f); break; + case NODE_GEOM_P: data = sd->P + sd->dP.dx; break; + case NODE_GEOM_uv: data = make_float3(sd->u + sd->du.dx, sd->v + sd->dv.dx, 0.0f); break; default: svm_node_geometry(kg, sd, stack, type, out_offset); return; } @@ -65,8 +65,8 @@ ccl_device void svm_node_geometry_bump_dy(KernelGlobals *kg, ShaderData *sd, flo float3 data; switch(type) { - case NODE_GEOM_P: data = ccl_fetch(sd, P) + ccl_fetch(sd, dP).dy; break; - case NODE_GEOM_uv: data = make_float3(ccl_fetch(sd, u) + ccl_fetch(sd, du).dy, ccl_fetch(sd, v) + ccl_fetch(sd, dv).dy, 0.0f); break; + case NODE_GEOM_P: data = sd->P + sd->dP.dy; break; + case NODE_GEOM_uv: data = make_float3(sd->u + sd->du.dy, sd->v + sd->dv.dy, 0.0f); break; default: svm_node_geometry(kg, sd, stack, type, out_offset); return; } @@ -87,9 +87,9 @@ ccl_device void svm_node_object_info(KernelGlobals *kg, ShaderData *sd, float *s stack_store_float3(stack, out_offset, object_location(kg, sd)); return; } - case NODE_INFO_OB_INDEX: data = object_pass_id(kg, ccl_fetch(sd, object)); break; + case NODE_INFO_OB_INDEX: data = object_pass_id(kg, sd->object); break; case NODE_INFO_MAT_INDEX: data = shader_pass_id(kg, sd); break; - case NODE_INFO_OB_RANDOM: data = object_random_number(kg, ccl_fetch(sd, object)); break; + case NODE_INFO_OB_RANDOM: data = object_random_number(kg, sd->object); break; default: data = 0.0f; break; } @@ -106,44 +106,44 @@ ccl_device void svm_node_particle_info(KernelGlobals *kg, { switch(type) { case NODE_INFO_PAR_INDEX: { - int particle_id = object_particle_id(kg, ccl_fetch(sd, object)); + int particle_id = object_particle_id(kg, sd->object); stack_store_float(stack, out_offset, particle_index(kg, particle_id)); break; } case NODE_INFO_PAR_AGE: { - int particle_id = object_particle_id(kg, ccl_fetch(sd, object)); + int particle_id = object_particle_id(kg, sd->object); stack_store_float(stack, out_offset, particle_age(kg, particle_id)); break; } case NODE_INFO_PAR_LIFETIME: { - int particle_id = object_particle_id(kg, ccl_fetch(sd, object)); + int particle_id = object_particle_id(kg, sd->object); stack_store_float(stack, out_offset, particle_lifetime(kg, particle_id)); break; } case NODE_INFO_PAR_LOCATION: { - int particle_id = object_particle_id(kg, ccl_fetch(sd, object)); + int particle_id = object_particle_id(kg, sd->object); stack_store_float3(stack, out_offset, particle_location(kg, particle_id)); break; } #if 0 /* XXX float4 currently not supported in SVM stack */ case NODE_INFO_PAR_ROTATION: { - int particle_id = object_particle_id(kg, ccl_fetch(sd, object)); + int particle_id = object_particle_id(kg, sd->object); stack_store_float4(stack, out_offset, particle_rotation(kg, particle_id)); break; } #endif case NODE_INFO_PAR_SIZE: { - int particle_id = object_particle_id(kg, ccl_fetch(sd, object)); + int particle_id = object_particle_id(kg, sd->object); stack_store_float(stack, out_offset, particle_size(kg, particle_id)); break; } case NODE_INFO_PAR_VELOCITY: { - int particle_id = object_particle_id(kg, ccl_fetch(sd, object)); + int particle_id = object_particle_id(kg, sd->object); stack_store_float3(stack, out_offset, particle_velocity(kg, particle_id)); break; } case NODE_INFO_PAR_ANGULAR_VELOCITY: { - int particle_id = object_particle_id(kg, ccl_fetch(sd, object)); + int particle_id = object_particle_id(kg, sd->object); stack_store_float3(stack, out_offset, particle_angular_velocity(kg, particle_id)); break; } @@ -165,7 +165,7 @@ ccl_device void svm_node_hair_info(KernelGlobals *kg, switch(type) { case NODE_INFO_CURVE_IS_STRAND: { - data = (ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) != 0; + data = (sd->type & PRIMITIVE_ALL_CURVE) != 0; stack_store_float(stack, out_offset, data); break; } @@ -177,7 +177,7 @@ ccl_device void svm_node_hair_info(KernelGlobals *kg, break; } /*case NODE_INFO_CURVE_FADE: { - data = ccl_fetch(sd, curve_transparency); + data = sd->curve_transparency; stack_store_float(stack, out_offset, data); break; }*/ diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index 2afdf61b476..76acc9253a1 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -144,7 +144,6 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, case 86: r = kernel_tex_image_interp(__tex_image_byte4_086, x, y); break; case 87: r = kernel_tex_image_interp(__tex_image_byte4_087, x, y); break; case 88: r = kernel_tex_image_interp(__tex_image_byte4_088, x, y); break; - case 89: r = kernel_tex_image_interp(__tex_image_byte4_089, x, y); break; default: kernel_assert(0); return make_float4(0.0f, 0.0f, 0.0f, 0.0f); @@ -238,9 +237,9 @@ ccl_device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *sta ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { /* get object space normal */ - float3 N = ccl_fetch(sd, N); + float3 N = sd->N; - N = ccl_fetch(sd, N); + N = sd->N; object_inverse_normal_transform(kg, sd, &N); /* project from direction vector to barycentric coordinates in triangles */ diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h index 04f6f623f18..1492e358608 100644 --- a/intern/cycles/kernel/svm/svm_light_path.h +++ b/intern/cycles/kernel/svm/svm_light_path.h @@ -31,8 +31,8 @@ ccl_device void svm_node_light_path(ShaderData *sd, ccl_addr_space PathState *st case NODE_LP_reflection: info = (path_flag & PATH_RAY_REFLECT)? 1.0f: 0.0f; break; case NODE_LP_transmission: info = (path_flag & PATH_RAY_TRANSMIT)? 1.0f: 0.0f; break; case NODE_LP_volume_scatter: info = (path_flag & PATH_RAY_VOLUME_SCATTER)? 1.0f: 0.0f; break; - case NODE_LP_backfacing: info = (ccl_fetch(sd, flag) & SD_BACKFACING)? 1.0f: 0.0f; break; - case NODE_LP_ray_length: info = ccl_fetch(sd, ray_length); break; + case NODE_LP_backfacing: info = (sd->flag & SD_BACKFACING)? 1.0f: 0.0f; break; + case NODE_LP_ray_length: info = sd->ray_length; break; case NODE_LP_ray_depth: info = (float)state->bounce; break; case NODE_LP_ray_diffuse: info = (float)state->diffuse_bounce; break; case NODE_LP_ray_glossy: info = (float)state->glossy_bounce; break; @@ -56,14 +56,14 @@ ccl_device void svm_node_light_falloff(ShaderData *sd, float *stack, uint4 node) switch(type) { case NODE_LIGHT_FALLOFF_QUADRATIC: break; - case NODE_LIGHT_FALLOFF_LINEAR: strength *= ccl_fetch(sd, ray_length); break; - case NODE_LIGHT_FALLOFF_CONSTANT: strength *= ccl_fetch(sd, ray_length)*ccl_fetch(sd, ray_length); break; + case NODE_LIGHT_FALLOFF_LINEAR: strength *= sd->ray_length; break; + case NODE_LIGHT_FALLOFF_CONSTANT: strength *= sd->ray_length*sd->ray_length; break; } float smooth = stack_load_float(stack, smooth_offset); if(smooth > 0.0f) { - float squared = ccl_fetch(sd, ray_length)*ccl_fetch(sd, ray_length); + float squared = sd->ray_length*sd->ray_length; /* Distant lamps set the ray length to FLT_MAX, which causes squared to overflow. */ if(isfinite(squared)) { strength *= squared/(smooth + squared); diff --git a/intern/cycles/kernel/svm/svm_noisetex.h b/intern/cycles/kernel/svm/svm_noisetex.h index 62ff38cf1c5..0347ab7b193 100644 --- a/intern/cycles/kernel/svm/svm_noisetex.h +++ b/intern/cycles/kernel/svm/svm_noisetex.h @@ -18,50 +18,42 @@ CCL_NAMESPACE_BEGIN /* Noise */ -ccl_device_inline void svm_noise(float3 p, float detail, float distortion, float *fac, float3 *color) -{ - int hard = 0; - - if(distortion != 0.0f) { - float3 r, offset = make_float3(13.5f, 13.5f, 13.5f); - - r.x = noise(p + offset) * distortion; - r.y = noise(p) * distortion; - r.z = noise(p - offset) * distortion; - - p += r; - } - - *fac = noise_turbulence(p, detail, hard); - *color = make_float3(*fac, - noise_turbulence(make_float3(p.y, p.x, p.z), detail, hard), - noise_turbulence(make_float3(p.y, p.z, p.x), detail, hard)); -} - ccl_device void svm_node_tex_noise(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) { uint co_offset, scale_offset, detail_offset, distortion_offset, fac_offset, color_offset; decode_node_uchar4(node.y, &co_offset, &scale_offset, &detail_offset, &distortion_offset); + decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL); uint4 node2 = read_node(kg, offset); float scale = stack_load_float_default(stack, scale_offset, node2.x); float detail = stack_load_float_default(stack, detail_offset, node2.y); float distortion = stack_load_float_default(stack, distortion_offset, node2.z); - float3 co = stack_load_float3(stack, co_offset); + float3 p = stack_load_float3(stack, co_offset) * scale; + int hard = 0; - float3 color; - float f; + if(distortion != 0.0f) { + float3 r, offset = make_float3(13.5f, 13.5f, 13.5f); + + r.x = noise(p + offset) * distortion; + r.y = noise(p) * distortion; + r.z = noise(p - offset) * distortion; - svm_noise(co*scale, detail, distortion, &f, &color); + p += r; + } - decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL); + float f = noise_turbulence(p, detail, hard); - if(stack_valid(fac_offset)) + if(stack_valid(fac_offset)) { stack_store_float(stack, fac_offset, f); - if(stack_valid(color_offset)) + } + if(stack_valid(color_offset)) { + float3 color = make_float3(f, + noise_turbulence(make_float3(p.y, p.x, p.z), detail, hard), + noise_turbulence(make_float3(p.y, p.z, p.x), detail, hard)); stack_store_float3(stack, color_offset, color); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h index c0b01262212..c94327401f5 100644 --- a/intern/cycles/kernel/svm/svm_tex_coord.h +++ b/intern/cycles/kernel/svm/svm_tex_coord.h @@ -31,9 +31,9 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg, switch(type) { case NODE_TEXCO_OBJECT: { - data = ccl_fetch(sd, P); + data = sd->P; if(node.w == 0) { - if(ccl_fetch(sd, object) != OBJECT_NONE) { + if(sd->object != OBJECT_NONE) { object_inverse_position_transform(kg, sd, &data); } } @@ -48,47 +48,47 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg, break; } case NODE_TEXCO_NORMAL: { - data = ccl_fetch(sd, N); + data = sd->N; object_inverse_normal_transform(kg, sd, &data); break; } case NODE_TEXCO_CAMERA: { Transform tfm = kernel_data.cam.worldtocamera; - if(ccl_fetch(sd, object) != OBJECT_NONE) - data = transform_point(&tfm, ccl_fetch(sd, P)); + if(sd->object != OBJECT_NONE) + data = transform_point(&tfm, sd->P); else - data = transform_point(&tfm, ccl_fetch(sd, P) + camera_position(kg)); + data = transform_point(&tfm, sd->P + camera_position(kg)); break; } case NODE_TEXCO_WINDOW: { - if((path_flag & PATH_RAY_CAMERA) && ccl_fetch(sd, object) == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) - data = camera_world_to_ndc(kg, sd, ccl_fetch(sd, ray_P)); + if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) + data = camera_world_to_ndc(kg, sd, sd->ray_P); else - data = camera_world_to_ndc(kg, sd, ccl_fetch(sd, P)); + data = camera_world_to_ndc(kg, sd, sd->P); data.z = 0.0f; break; } case NODE_TEXCO_REFLECTION: { - if(ccl_fetch(sd, object) != OBJECT_NONE) - data = 2.0f*dot(ccl_fetch(sd, N), ccl_fetch(sd, I))*ccl_fetch(sd, N) - ccl_fetch(sd, I); + if(sd->object != OBJECT_NONE) + data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I; else - data = ccl_fetch(sd, I); + data = sd->I; break; } case NODE_TEXCO_DUPLI_GENERATED: { - data = object_dupli_generated(kg, ccl_fetch(sd, object)); + data = object_dupli_generated(kg, sd->object); break; } case NODE_TEXCO_DUPLI_UV: { - data = object_dupli_uv(kg, ccl_fetch(sd, object)); + data = object_dupli_uv(kg, sd->object); break; } case NODE_TEXCO_VOLUME_GENERATED: { - data = ccl_fetch(sd, P); + data = sd->P; #ifdef __VOLUME__ - if(ccl_fetch(sd, object) != OBJECT_NONE) + if(sd->object != OBJECT_NONE) data = volume_normalized_position(kg, sd, data); #endif break; @@ -112,9 +112,9 @@ ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg, switch(type) { case NODE_TEXCO_OBJECT: { - data = ccl_fetch(sd, P) + ccl_fetch(sd, dP).dx; + data = sd->P + sd->dP.dx; if(node.w == 0) { - if(ccl_fetch(sd, object) != OBJECT_NONE) { + if(sd->object != OBJECT_NONE) { object_inverse_position_transform(kg, sd, &data); } } @@ -129,47 +129,47 @@ ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg, break; } case NODE_TEXCO_NORMAL: { - data = ccl_fetch(sd, N); + data = sd->N; object_inverse_normal_transform(kg, sd, &data); break; } case NODE_TEXCO_CAMERA: { Transform tfm = kernel_data.cam.worldtocamera; - if(ccl_fetch(sd, object) != OBJECT_NONE) - data = transform_point(&tfm, ccl_fetch(sd, P) + ccl_fetch(sd, dP).dx); + if(sd->object != OBJECT_NONE) + data = transform_point(&tfm, sd->P + sd->dP.dx); else - data = transform_point(&tfm, ccl_fetch(sd, P) + ccl_fetch(sd, dP).dx + camera_position(kg)); + data = transform_point(&tfm, sd->P + sd->dP.dx + camera_position(kg)); break; } case NODE_TEXCO_WINDOW: { - if((path_flag & PATH_RAY_CAMERA) && ccl_fetch(sd, object) == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) - data = camera_world_to_ndc(kg, sd, ccl_fetch(sd, ray_P) + ccl_fetch(sd, ray_dP).dx); + if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) + data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx); else - data = camera_world_to_ndc(kg, sd, ccl_fetch(sd, P) + ccl_fetch(sd, dP).dx); + data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx); data.z = 0.0f; break; } case NODE_TEXCO_REFLECTION: { - if(ccl_fetch(sd, object) != OBJECT_NONE) - data = 2.0f*dot(ccl_fetch(sd, N), ccl_fetch(sd, I))*ccl_fetch(sd, N) - ccl_fetch(sd, I); + if(sd->object != OBJECT_NONE) + data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I; else - data = ccl_fetch(sd, I); + data = sd->I; break; } case NODE_TEXCO_DUPLI_GENERATED: { - data = object_dupli_generated(kg, ccl_fetch(sd, object)); + data = object_dupli_generated(kg, sd->object); break; } case NODE_TEXCO_DUPLI_UV: { - data = object_dupli_uv(kg, ccl_fetch(sd, object)); + data = object_dupli_uv(kg, sd->object); break; } case NODE_TEXCO_VOLUME_GENERATED: { - data = ccl_fetch(sd, P) + ccl_fetch(sd, dP).dx; + data = sd->P + sd->dP.dx; #ifdef __VOLUME__ - if(ccl_fetch(sd, object) != OBJECT_NONE) + if(sd->object != OBJECT_NONE) data = volume_normalized_position(kg, sd, data); #endif break; @@ -196,9 +196,9 @@ ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg, switch(type) { case NODE_TEXCO_OBJECT: { - data = ccl_fetch(sd, P) + ccl_fetch(sd, dP).dy; + data = sd->P + sd->dP.dy; if(node.w == 0) { - if(ccl_fetch(sd, object) != OBJECT_NONE) { + if(sd->object != OBJECT_NONE) { object_inverse_position_transform(kg, sd, &data); } } @@ -213,47 +213,47 @@ ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg, break; } case NODE_TEXCO_NORMAL: { - data = ccl_fetch(sd, N); + data = sd->N; object_inverse_normal_transform(kg, sd, &data); break; } case NODE_TEXCO_CAMERA: { Transform tfm = kernel_data.cam.worldtocamera; - if(ccl_fetch(sd, object) != OBJECT_NONE) - data = transform_point(&tfm, ccl_fetch(sd, P) + ccl_fetch(sd, dP).dy); + if(sd->object != OBJECT_NONE) + data = transform_point(&tfm, sd->P + sd->dP.dy); else - data = transform_point(&tfm, ccl_fetch(sd, P) + ccl_fetch(sd, dP).dy + camera_position(kg)); + data = transform_point(&tfm, sd->P + sd->dP.dy + camera_position(kg)); break; } case NODE_TEXCO_WINDOW: { - if((path_flag & PATH_RAY_CAMERA) && ccl_fetch(sd, object) == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) - data = camera_world_to_ndc(kg, sd, ccl_fetch(sd, ray_P) + ccl_fetch(sd, ray_dP).dy); + if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) + data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy); else - data = camera_world_to_ndc(kg, sd, ccl_fetch(sd, P) + ccl_fetch(sd, dP).dy); + data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy); data.z = 0.0f; break; } case NODE_TEXCO_REFLECTION: { - if(ccl_fetch(sd, object) != OBJECT_NONE) - data = 2.0f*dot(ccl_fetch(sd, N), ccl_fetch(sd, I))*ccl_fetch(sd, N) - ccl_fetch(sd, I); + if(sd->object != OBJECT_NONE) + data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I; else - data = ccl_fetch(sd, I); + data = sd->I; break; } case NODE_TEXCO_DUPLI_GENERATED: { - data = object_dupli_generated(kg, ccl_fetch(sd, object)); + data = object_dupli_generated(kg, sd->object); break; } case NODE_TEXCO_DUPLI_UV: { - data = object_dupli_uv(kg, ccl_fetch(sd, object)); + data = object_dupli_uv(kg, sd->object); break; } case NODE_TEXCO_VOLUME_GENERATED: { - data = ccl_fetch(sd, P) + ccl_fetch(sd, dP).dy; + data = sd->P + sd->dP.dy; #ifdef __VOLUME__ - if(ccl_fetch(sd, object) != OBJECT_NONE) + if(sd->object != OBJECT_NONE) data = volume_normalized_position(kg, sd, data); #endif break; @@ -274,12 +274,12 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st float3 color = stack_load_float3(stack, color_offset); color = 2.0f*make_float3(color.x - 0.5f, color.y - 0.5f, color.z - 0.5f); - bool is_backfacing = (ccl_fetch(sd, flag) & SD_BACKFACING) != 0; + bool is_backfacing = (sd->flag & SD_BACKFACING) != 0; float3 N; if(space == NODE_NORMAL_MAP_TANGENT) { /* tangent space */ - if(ccl_fetch(sd, object) == OBJECT_NONE) { + if(sd->object == OBJECT_NONE) { stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f)); return; } @@ -299,11 +299,11 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st float sign = primitive_attribute_float(kg, sd, attr_sign, NULL, NULL); float3 normal; - if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) { + if(sd->shader & SHADER_SMOOTH_NORMAL) { normal = primitive_attribute_float3(kg, sd, attr_normal, NULL, NULL); } else { - normal = ccl_fetch(sd, Ng); + normal = sd->Ng; /* the normal is already inverted, which is too soon for the math here */ if(is_backfacing) { @@ -345,11 +345,11 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st if(strength != 1.0f) { strength = max(strength, 0.0f); - N = safe_normalize(ccl_fetch(sd, N) + (N - ccl_fetch(sd, N))*strength); + N = safe_normalize(sd->N + (N - sd->N)*strength); } if(is_zero(N)) { - N = ccl_fetch(sd, N); + N = sd->N; } stack_store_float3(stack, normal_offset, N); @@ -377,7 +377,7 @@ ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack float3 generated; if(desc.offset == ATTR_STD_NOT_FOUND) - generated = ccl_fetch(sd, P); + generated = sd->P; else generated = primitive_attribute_float3(kg, sd, desc, NULL, NULL); @@ -390,7 +390,7 @@ ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack } object_normal_transform(kg, sd, &tangent); - tangent = cross(ccl_fetch(sd, N), normalize(cross(tangent, ccl_fetch(sd, N)))); + tangent = cross(sd->N, normalize(cross(tangent, sd->N))); stack_store_float3(stack, tangent_offset, tangent); } diff --git a/intern/cycles/kernel/svm/svm_vector_transform.h b/intern/cycles/kernel/svm/svm_vector_transform.h index 4c32130d06d..4e92f27acdb 100644 --- a/intern/cycles/kernel/svm/svm_vector_transform.h +++ b/intern/cycles/kernel/svm/svm_vector_transform.h @@ -33,7 +33,7 @@ ccl_device void svm_node_vector_transform(KernelGlobals *kg, ShaderData *sd, flo NodeVectorTransformConvertSpace to = (NodeVectorTransformConvertSpace)ito; Transform tfm; - bool is_object = (ccl_fetch(sd, object) != OBJECT_NONE); + bool is_object = (sd->object != OBJECT_NONE); bool is_direction = (type == NODE_VECTOR_TRANSFORM_TYPE_VECTOR || type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL); /* From world */ diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h index 87e40791333..3c6353c8001 100644 --- a/intern/cycles/kernel/svm/svm_wireframe.h +++ b/intern/cycles/kernel/svm/svm_wireframe.h @@ -41,9 +41,9 @@ ccl_device_inline float wireframe(KernelGlobals *kg, float3 *P) { #ifdef __HAIR__ - if(ccl_fetch(sd, prim) != PRIM_NONE && ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) + if(sd->prim != PRIM_NONE && sd->type & PRIMITIVE_ALL_TRIANGLE) #else - if(ccl_fetch(sd, prim) != PRIM_NONE) + if(sd->prim != PRIM_NONE) #endif { float3 Co[3]; @@ -52,12 +52,12 @@ ccl_device_inline float wireframe(KernelGlobals *kg, /* Triangles */ int np = 3; - if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) - triangle_vertices(kg, ccl_fetch(sd, prim), Co); + if(sd->type & PRIMITIVE_TRIANGLE) + triangle_vertices(kg, sd->prim, Co); else - motion_triangle_vertices(kg, ccl_fetch(sd, object), ccl_fetch(sd, prim), ccl_fetch(sd, time), Co); + motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, Co); - if(!(ccl_fetch(sd, object_flag) & SD_OBJECT_TRANSFORM_APPLIED)) { + if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { object_position_transform(kg, sd, &Co[0]); object_position_transform(kg, sd, &Co[1]); object_position_transform(kg, sd, &Co[2]); @@ -66,8 +66,8 @@ ccl_device_inline float wireframe(KernelGlobals *kg, if(pixel_size) { // Project the derivatives of P to the viewing plane defined // by I so we have a measure of how big is a pixel at this point - float pixelwidth_x = len(ccl_fetch(sd, dP).dx - dot(ccl_fetch(sd, dP).dx, ccl_fetch(sd, I)) * ccl_fetch(sd, I)); - float pixelwidth_y = len(ccl_fetch(sd, dP).dy - dot(ccl_fetch(sd, dP).dy, ccl_fetch(sd, I)) * ccl_fetch(sd, I)); + float pixelwidth_x = len(sd->dP.dx - dot(sd->dP.dx, sd->I) * sd->I); + float pixelwidth_y = len(sd->dP.dy - dot(sd->dP.dy, sd->I) * sd->I); // Take the average of both axis' length pixelwidth = (pixelwidth_x + pixelwidth_y) * 0.5f; } @@ -113,20 +113,20 @@ ccl_device void svm_node_wireframe(KernelGlobals *kg, * With OpenCL 2.0 it's possible to avoid this change, but for until * then we'll be living with such an exception. */ - float3 P = ccl_fetch(sd, P); + float3 P = sd->P; float f = wireframe(kg, sd, size, pixel_size, &P); #else - float f = wireframe(kg, sd, size, pixel_size, &ccl_fetch(sd, P)); + float f = wireframe(kg, sd, size, pixel_size, &sd->P); #endif /* TODO(sergey): Think of faster way to calculate derivatives. */ if(bump_offset == NODE_BUMP_OFFSET_DX) { - float3 Px = ccl_fetch(sd, P) - ccl_fetch(sd, dP).dx; - f += (f - wireframe(kg, sd, size, pixel_size, &Px)) / len(ccl_fetch(sd, dP).dx); + float3 Px = sd->P - sd->dP.dx; + f += (f - wireframe(kg, sd, size, pixel_size, &Px)) / len(sd->dP.dx); } else if(bump_offset == NODE_BUMP_OFFSET_DY) { - float3 Py = ccl_fetch(sd, P) - ccl_fetch(sd, dP).dy; - f += (f - wireframe(kg, sd, size, pixel_size, &Py)) / len(ccl_fetch(sd, dP).dy); + float3 Py = sd->P - sd->dP.dy; + f += (f - wireframe(kg, sd, size, pixel_size, &Py)) / len(sd->dP.dy); } if(stack_valid(out_fac)) diff --git a/intern/cycles/render/background.h b/intern/cycles/render/background.h index 8029c6a9e80..deb22c9c2f2 100644 --- a/intern/cycles/render/background.h +++ b/intern/cycles/render/background.h @@ -30,7 +30,7 @@ class Shader; class Background : public Node { public: - NODE_DECLARE; + NODE_DECLARE float ao_factor; float ao_distance; diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp index d9a297002c6..c2f6293a50b 100644 --- a/intern/cycles/render/bake.cpp +++ b/intern/cycles/render/bake.cpp @@ -171,9 +171,9 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre /* needs to be up to data for attribute access */ device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); - device->mem_alloc(d_input, MEM_READ_ONLY); + device->mem_alloc("bake_input", d_input, MEM_READ_ONLY); device->mem_copy_to(d_input); - device->mem_alloc(d_output, MEM_READ_WRITE); + device->mem_alloc("bake_output", d_output, MEM_READ_WRITE); DeviceTask task(DeviceTask::SHADER); task.shader_input = d_input.device_pointer; diff --git a/intern/cycles/render/bake.h b/intern/cycles/render/bake.h index 25f5eb3c897..aed9c5a8e75 100644 --- a/intern/cycles/render/bake.h +++ b/intern/cycles/render/bake.h @@ -73,7 +73,7 @@ public: bool need_update; - int total_pixel_samples; + size_t total_pixel_samples; private: BakeData *m_bake_data; diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index f1692712d61..e3ef4bf13fb 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -129,13 +129,13 @@ void RenderBuffers::reset(Device *device, BufferParams& params_) /* allocate buffer */ buffer.resize(params.width*params.height*params.get_passes_size()); - device->mem_alloc(buffer, MEM_READ_WRITE); + device->mem_alloc("render_buffer", buffer, MEM_READ_WRITE); device->mem_zero(buffer); /* allocate rng state */ rng_state.resize(params.width, params.height); - device->mem_alloc(rng_state, MEM_READ_WRITE); + device->mem_alloc("rng_state", rng_state, MEM_READ_WRITE); } bool RenderBuffers::copy_from_device() diff --git a/intern/cycles/render/camera.h b/intern/cycles/render/camera.h index 141ef9cccef..655d74e42d8 100644 --- a/intern/cycles/render/camera.h +++ b/intern/cycles/render/camera.h @@ -39,7 +39,7 @@ class Scene; class Camera : public Node { public: - NODE_DECLARE; + NODE_DECLARE /* Specifies an offset for the shutter's time interval. */ enum MotionPosition { diff --git a/intern/cycles/render/film.h b/intern/cycles/render/film.h index 9fa51c51f52..d917057ed91 100644 --- a/intern/cycles/render/film.h +++ b/intern/cycles/render/film.h @@ -53,7 +53,7 @@ public: class Film : public Node { public: - NODE_DECLARE; + NODE_DECLARE float exposure; array<Pass> passes; diff --git a/intern/cycles/render/graph.h b/intern/cycles/render/graph.h index 780fdf49ca4..06524d3fa13 100644 --- a/intern/cycles/render/graph.h +++ b/intern/cycles/render/graph.h @@ -201,14 +201,14 @@ public: /* Node definition utility macros */ #define SHADER_NODE_CLASS(type) \ - NODE_DECLARE; \ + NODE_DECLARE \ type(); \ virtual ShaderNode *clone() const { return new type(*this); } \ virtual void compile(SVMCompiler& compiler); \ virtual void compile(OSLCompiler& compiler); \ #define SHADER_NODE_NO_CLONE_CLASS(type) \ - NODE_DECLARE; \ + NODE_DECLARE \ type(); \ virtual void compile(SVMCompiler& compiler); \ virtual void compile(OSLCompiler& compiler); \ diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index fd8a1262208..8985431b68a 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -285,9 +285,8 @@ int ImageManager::add_image(const string& filename, thread_scoped_lock device_lock(device_mutex); - /* Do we have a float? */ - if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4) - is_float = true; + /* Check whether it's a float texture. */ + is_float = (type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4); /* No single channel and half textures on CUDA (Fermi) and no half on OpenCL, use available slots */ if((type == IMAGE_DATA_TYPE_FLOAT || diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h index 27fff4831e5..3ce41d5a185 100644 --- a/intern/cycles/render/integrator.h +++ b/intern/cycles/render/integrator.h @@ -29,7 +29,7 @@ class Scene; class Integrator : public Node { public: - NODE_DECLARE; + NODE_DECLARE int min_bounce; int max_bounce; diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index 6a4557506c3..fc6790dc022 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -57,9 +57,9 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); - device->mem_alloc(d_input, MEM_READ_ONLY); + device->mem_alloc("shade_background_pixels_input", d_input, MEM_READ_ONLY); device->mem_copy_to(d_input); - device->mem_alloc(d_output, MEM_WRITE_ONLY); + device->mem_alloc("shade_background_pixels_output", d_output, MEM_WRITE_ONLY); DeviceTask main_task(DeviceTask::SHADER); main_task.shader_input = d_input.device_pointer; diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index c42b32919d4..b7660297f3e 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -1873,9 +1873,14 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene * dscene->prim_object.reference((uint*)&pack.prim_object[0], pack.prim_object.size()); device->tex_alloc("__prim_object", dscene->prim_object); } + if(pack.prim_time.size()) { + dscene->prim_time.reference((float2*)&pack.prim_time[0], pack.prim_time.size()); + device->tex_alloc("__prim_time", dscene->prim_time); + } dscene->data.bvh.root = pack.root_index; dscene->data.bvh.use_qbvh = scene->params.use_qbvh; + dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0); } void MeshManager::device_update_flags(Device * /*device*/, @@ -2152,6 +2157,7 @@ void MeshManager::device_free(Device *device, DeviceScene *dscene) device->tex_free(dscene->prim_visibility); device->tex_free(dscene->prim_index); device->tex_free(dscene->prim_object); + device->tex_free(dscene->prim_time); device->tex_free(dscene->tri_shader); device->tex_free(dscene->tri_vnormal); device->tex_free(dscene->tri_vindex); @@ -2173,6 +2179,7 @@ void MeshManager::device_free(Device *device, DeviceScene *dscene) dscene->prim_visibility.clear(); dscene->prim_index.clear(); dscene->prim_object.clear(); + dscene->prim_time.clear(); dscene->tri_shader.clear(); dscene->tri_vnormal.clear(); dscene->tri_vindex.clear(); diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h index 5f33e30eac2..1f8b880c161 100644 --- a/intern/cycles/render/mesh.h +++ b/intern/cycles/render/mesh.h @@ -48,7 +48,7 @@ struct PackedPatchTable; class Mesh : public Node { public: - NODE_DECLARE; + NODE_DECLARE /* Mesh Triangle */ struct Triangle { diff --git a/intern/cycles/render/mesh_displace.cpp b/intern/cycles/render/mesh_displace.cpp index adc5b820298..4acb7911560 100644 --- a/intern/cycles/render/mesh_displace.cpp +++ b/intern/cycles/render/mesh_displace.cpp @@ -121,9 +121,9 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me /* needs to be up to data for attribute access */ device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); - device->mem_alloc(d_input, MEM_READ_ONLY); + device->mem_alloc("displace_input", d_input, MEM_READ_ONLY); device->mem_copy_to(d_input); - device->mem_alloc(d_output, MEM_WRITE_ONLY); + device->mem_alloc("displace_output", d_output, MEM_WRITE_ONLY); DeviceTask task(DeviceTask::SHADER); task.shader_input = d_input.device_pointer; diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index 13b149eddfa..7052c03ed94 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -27,6 +27,7 @@ #include "util_sky_model.h" #include "util_foreach.h" +#include "util_logging.h" #include "util_transform.h" CCL_NAMESPACE_BEGIN @@ -1931,21 +1932,38 @@ GlossyBsdfNode::GlossyBsdfNode() void GlossyBsdfNode::simplify_settings(Scene *scene) { if(distribution_orig == NBUILTIN_CLOSURES) { + roughness_orig = roughness; distribution_orig = distribution; } + else { + /* By default we use original values, so we don't worry about restoring + * defaults later one and can only do override when needed. + */ + roughness = roughness_orig; + distribution = distribution_orig; + } Integrator *integrator = scene->integrator; + ShaderInput *roughness_input = input("Roughness"); if(integrator->filter_glossy == 0.0f) { /* Fallback to Sharp closure for Roughness close to 0. * Note: Keep the epsilon in sync with kernel! */ - ShaderInput *roughness_input = input("Roughness"); if(!roughness_input->link && roughness <= 1e-4f) { + VLOG(1) << "Using sharp glossy BSDF."; distribution = CLOSURE_BSDF_REFLECTION_ID; } } else { - /* Rollback to original distribution when filter glossy is used. */ - distribution = distribution_orig; + /* If filter glossy is used we replace Sharp glossy with GGX so we can + * benefit from closure blur to remove unwanted noise. + */ + if(roughness_input->link == NULL && + distribution == CLOSURE_BSDF_REFLECTION_ID) + { + VLOG(1) << "Using GGX glossy with filter glossy."; + distribution = CLOSURE_BSDF_MICROFACET_GGX_ID; + roughness = 0.0f; + } } closure = distribution; } @@ -1953,7 +1971,8 @@ void GlossyBsdfNode::simplify_settings(Scene *scene) bool GlossyBsdfNode::has_integrator_dependency() { ShaderInput *roughness_input = input("Roughness"); - return !roughness_input->link && roughness <= 1e-4f; + return !roughness_input->link && + (distribution == CLOSURE_BSDF_REFLECTION_ID || roughness <= 1e-4f); } void GlossyBsdfNode::compile(SVMCompiler& compiler) @@ -2008,21 +2027,38 @@ GlassBsdfNode::GlassBsdfNode() void GlassBsdfNode::simplify_settings(Scene *scene) { if(distribution_orig == NBUILTIN_CLOSURES) { + roughness_orig = roughness; distribution_orig = distribution; } + else { + /* By default we use original values, so we don't worry about restoring + * defaults later one and can only do override when needed. + */ + roughness = roughness_orig; + distribution = distribution_orig; + } Integrator *integrator = scene->integrator; + ShaderInput *roughness_input = input("Roughness"); if(integrator->filter_glossy == 0.0f) { /* Fallback to Sharp closure for Roughness close to 0. * Note: Keep the epsilon in sync with kernel! */ - ShaderInput *roughness_input = input("Roughness"); if(!roughness_input->link && roughness <= 1e-4f) { + VLOG(1) << "Using sharp glass BSDF."; distribution = CLOSURE_BSDF_SHARP_GLASS_ID; } } else { - /* Rollback to original distribution when filter glossy is used. */ - distribution = distribution_orig; + /* If filter glossy is used we replace Sharp glossy with GGX so we can + * benefit from closure blur to remove unwanted noise. + */ + if(roughness_input->link == NULL && + distribution == CLOSURE_BSDF_SHARP_GLASS_ID) + { + VLOG(1) << "Using GGX glass with filter glossy."; + distribution = CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID; + roughness = 0.0f; + } } closure = distribution; } @@ -2030,7 +2066,8 @@ void GlassBsdfNode::simplify_settings(Scene *scene) bool GlassBsdfNode::has_integrator_dependency() { ShaderInput *roughness_input = input("Roughness"); - return !roughness_input->link && roughness <= 1e-4f; + return !roughness_input->link && + (distribution == CLOSURE_BSDF_SHARP_GLASS_ID || roughness <= 1e-4f); } void GlassBsdfNode::compile(SVMCompiler& compiler) @@ -2085,21 +2122,38 @@ RefractionBsdfNode::RefractionBsdfNode() void RefractionBsdfNode::simplify_settings(Scene *scene) { if(distribution_orig == NBUILTIN_CLOSURES) { + roughness_orig = roughness; distribution_orig = distribution; } + else { + /* By default we use original values, so we don't worry about restoring + * defaults later one and can only do override when needed. + */ + roughness = roughness_orig; + distribution = distribution_orig; + } Integrator *integrator = scene->integrator; + ShaderInput *roughness_input = input("Roughness"); if(integrator->filter_glossy == 0.0f) { /* Fallback to Sharp closure for Roughness close to 0. * Note: Keep the epsilon in sync with kernel! */ - ShaderInput *roughness_input = input("Roughness"); if(!roughness_input->link && roughness <= 1e-4f) { + VLOG(1) << "Using sharp refraction BSDF."; distribution = CLOSURE_BSDF_REFRACTION_ID; } } else { - /* Rollback to original distribution when filter glossy is used. */ - distribution = distribution_orig; + /* If filter glossy is used we replace Sharp glossy with GGX so we can + * benefit from closure blur to remove unwanted noise. + */ + if(roughness_input->link == NULL && + distribution == CLOSURE_BSDF_REFRACTION_ID) + { + VLOG(1) << "Using GGX refraction with filter glossy."; + distribution = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; + roughness = 0.0f; + } } closure = distribution; } @@ -2107,7 +2161,8 @@ void RefractionBsdfNode::simplify_settings(Scene *scene) bool RefractionBsdfNode::has_integrator_dependency() { ShaderInput *roughness_input = input("Roughness"); - return !roughness_input->link && roughness <= 1e-4f; + return !roughness_input->link && + (distribution == CLOSURE_BSDF_REFRACTION_ID || roughness <= 1e-4f); } void RefractionBsdfNode::compile(SVMCompiler& compiler) diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h index eb0f7977dd1..d159c801810 100644 --- a/intern/cycles/render/nodes.h +++ b/intern/cycles/render/nodes.h @@ -388,7 +388,7 @@ public: bool has_integrator_dependency(); ClosureType get_closure_type() { return distribution; } - float roughness; + float roughness, roughness_orig; ClosureType distribution, distribution_orig; }; @@ -400,7 +400,7 @@ public: bool has_integrator_dependency(); ClosureType get_closure_type() { return distribution; } - float roughness, IOR; + float roughness, roughness_orig, IOR; ClosureType distribution, distribution_orig; }; @@ -412,7 +412,7 @@ public: bool has_integrator_dependency(); ClosureType get_closure_type() { return distribution; } - float roughness, IOR; + float roughness, roughness_orig, IOR; ClosureType distribution, distribution_orig; }; diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h index 7e306fab2a8..3495849d149 100644 --- a/intern/cycles/render/object.h +++ b/intern/cycles/render/object.h @@ -40,7 +40,7 @@ struct Transform; class Object : public Node { public: - NODE_DECLARE; + NODE_DECLARE Mesh *mesh; Transform tfm; diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index 8768682043f..9f398c444f4 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -69,6 +69,7 @@ public: device_vector<uint> prim_visibility; device_vector<uint> prim_index; device_vector<uint> prim_object; + device_vector<float2> prim_time; /* mesh */ device_vector<uint> tri_shader; diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 7c01934cfd8..0c7bd271371 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -230,7 +230,9 @@ void Session::run_gpu() while(1) { scoped_timer pause_timer; pause_cond.wait(pause_lock); - progress.add_skip_time(pause_timer, params.background); + if(pause) { + progress.add_skip_time(pause_timer, params.background); + } update_status_time(pause, no_tiles); progress.set_update(); @@ -520,7 +522,9 @@ void Session::run_cpu() while(1) { scoped_timer pause_timer; pause_cond.wait(pause_lock); - progress.add_skip_time(pause_timer, params.background); + if(pause) { + progress.add_skip_time(pause_timer, params.background); + } update_status_time(pause, no_tiles); progress.set_update(); @@ -650,6 +654,8 @@ void Session::load_kernels() if(!kernels_loaded) { progress.set_status("Loading render kernels (may take a few minutes the first time)"); + scoped_timer timer; + DeviceRequestedFeatures requested_features = get_requested_device_features(); VLOG(2) << "Requested features:\n" << requested_features; if(!device->load_kernels(requested_features)) { @@ -663,6 +669,9 @@ void Session::load_kernels() return; } + progress.add_skip_time(timer, false); + VLOG(1) << "Total time spent loading kernels: " << time_dt() - timer.get_start(); + kernels_loaded = true; } } @@ -883,6 +892,7 @@ void Session::path_trace() task.need_finish_queue = params.progressive_refine; task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH; task.requested_tile_size = params.tile_size; + task.passes_size = tile_manager.params.get_passes_size(); device->task_add(task); } diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h index 7d896652196..490c3f1c95d 100644 --- a/intern/cycles/render/shader.h +++ b/intern/cycles/render/shader.h @@ -82,7 +82,7 @@ enum DisplacementMethod { class Shader : public Node { public: - NODE_DECLARE; + NODE_DECLARE int pass_id; diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h index 433e41fbbb6..6c52117ef9a 100644 --- a/intern/cycles/util/util_atomic.h +++ b/intern/cycles/util/util_atomic.h @@ -32,6 +32,13 @@ ATOMIC_INLINE void atomic_update_max_z(size_t *maximum_value, size_t value) } } +#define atomic_add_and_fetch_float(p, x) atomic_add_and_fetch_fl((p), (x)) + +#define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1) + +#define CCL_LOCAL_MEM_FENCE 0 +#define ccl_barrier(flags) (void)0 + #else /* __KERNEL_GPU__ */ #ifdef __KERNEL_OPENCL__ @@ -39,7 +46,7 @@ ATOMIC_INLINE void atomic_update_max_z(size_t *maximum_value, size_t value) /* Float atomics implementation credits: * http://suhorukov.blogspot.in/2011/12/opencl-11-atomic-operations-on-floating.html */ -ccl_device_inline void atomic_add_and_fetch_float(volatile ccl_global float *source, +ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *source, const float operand) { union { @@ -56,10 +63,29 @@ ccl_device_inline void atomic_add_and_fetch_float(volatile ccl_global float *sou } while(atomic_cmpxchg((volatile ccl_global unsigned int *)source, prev_value.int_value, new_value.int_value) != prev_value.int_value); + return new_value.float_value; } +#define atomic_fetch_and_add_uint32(p, x) atomic_add((p), (x)) +#define atomic_fetch_and_inc_uint32(p) atomic_inc((p)) + +#define CCL_LOCAL_MEM_FENCE CLK_LOCAL_MEM_FENCE +#define ccl_barrier(flags) barrier(flags) + #endif /* __KERNEL_OPENCL__ */ +#ifdef __KERNEL_CUDA__ + +#define atomic_add_and_fetch_float(p, x) (atomicAdd((float*)(p), (float)(x)) + (float)(x)) + +#define atomic_fetch_and_add_uint32(p, x) atomicAdd((unsigned int*)(p), (unsigned int)(x)) +#define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1) + +#define CCL_LOCAL_MEM_FENCE +#define ccl_barrier(flags) __syncthreads() + +#endif /* __KERNEL_CUDA__ */ + #endif /* __KERNEL_GPU__ */ #endif /* __UTIL_ATOMIC_H__ */ diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp index 80d177d2cae..f12c5e28c80 100644 --- a/intern/cycles/util/util_debug.cpp +++ b/intern/cycles/util/util_debug.cpp @@ -29,7 +29,8 @@ DebugFlags::CPU::CPU() sse41(true), sse3(true), sse2(true), - qbvh(true) + qbvh(true), + split_kernel(false) { reset(); } @@ -55,10 +56,12 @@ void DebugFlags::CPU::reset() #undef CHECK_CPU_FLAGS qbvh = true; + split_kernel = false; } DebugFlags::CUDA::CUDA() - : adaptive_compile(false) + : adaptive_compile(false), + split_kernel(false) { reset(); } @@ -67,6 +70,8 @@ void DebugFlags::CUDA::reset() { if(getenv("CYCLES_CUDA_ADAPTIVE_COMPILE") != NULL) adaptive_compile = true; + + split_kernel = false; } DebugFlags::OpenCL::OpenCL() @@ -133,7 +138,9 @@ std::ostream& operator <<(std::ostream &os, << " AVX : " << string_from_bool(debug_flags.cpu.avx) << "\n" << " SSE4.1 : " << string_from_bool(debug_flags.cpu.sse41) << "\n" << " SSE3 : " << string_from_bool(debug_flags.cpu.sse3) << "\n" - << " SSE2 : " << string_from_bool(debug_flags.cpu.sse2) << "\n"; + << " SSE2 : " << string_from_bool(debug_flags.cpu.sse2) << "\n" + << " QBVH : " << string_from_bool(debug_flags.cpu.qbvh) << "\n" + << " Split : " << string_from_bool(debug_flags.cpu.split_kernel) << "\n"; os << "CUDA flags:\n" << " Adaptive Compile: " << string_from_bool(debug_flags.cuda.adaptive_compile) << "\n"; diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h index 73fd228b5d9..911c95de4ab 100644 --- a/intern/cycles/util/util_debug.h +++ b/intern/cycles/util/util_debug.h @@ -46,6 +46,9 @@ public: /* Whether QBVH usage is allowed or not. */ bool qbvh; + + /* Whether split kernel is used */ + bool split_kernel; }; /* Descriptor of CUDA feature-set to be used. */ @@ -58,6 +61,9 @@ public: /* Whether adaptive feature based runtime compile is enabled or not. * Requires the CUDA Toolkit and only works on Linux atm. */ bool adaptive_compile; + + /* Whether split kernel is used */ + bool split_kernel; }; /* Descriptor of OpenCL feature-set to be used. */ diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h index 5db3384cda4..c1a47d58c55 100644 --- a/intern/cycles/util/util_half.h +++ b/intern/cycles/util/util_half.h @@ -18,6 +18,7 @@ #define __UTIL_HALF_H__ #include "util_types.h" +#include "util_math.h" #ifdef __KERNEL_SSE2__ #include "util_simd.h" @@ -110,6 +111,28 @@ ccl_device_inline float4 half4_to_float4(half4 h) return f; } +ccl_device_inline half float_to_half(float f) +{ + const uint u = __float_as_uint(f); + /* Sign bit, shifted to it's position. */ + uint sign_bit = u & 0x80000000; + sign_bit >>= 16; + /* Exponent. */ + uint exponent_bits = u & 0x7f800000; + /* Non-sign bits. */ + uint value_bits = u & 0x7fffffff; + value_bits >>= 13; /* Align mantissa on MSB. */ + value_bits -= 0x1c000; /* Adjust bias. */ + /* Flush-to-zero. */ + value_bits = (exponent_bits < 0x38800000) ? 0 : value_bits; + /* Clamp-to-max. */ + value_bits = (exponent_bits > 0x47000000) ? 0x7bff : value_bits; + /* Denormals-as-zero. */ + value_bits = (exponent_bits == 0 ? 0 : value_bits); + /* Re-insert sign bit and return. */ + return (value_bits | sign_bit); +} + #endif #endif diff --git a/intern/cycles/util/util_image_impl.h b/intern/cycles/util/util_image_impl.h index 73ecfda0855..4daf1eaac22 100644 --- a/intern/cycles/util/util_image_impl.h +++ b/intern/cycles/util/util_image_impl.h @@ -19,6 +19,7 @@ #include "util_algorithm.h" #include "util_debug.h" +#include "util_half.h" #include "util_image.h" CCL_NAMESPACE_BEGIN @@ -38,6 +39,52 @@ const T *util_image_read(const vector<T>& pixels, return &pixels[index]; } +/* Cast input pixel from unknown storage to float. */ +template<typename T> +inline float cast_to_float(T value); + +template<> +inline float cast_to_float(float value) +{ + return value; +} +template<> +inline float cast_to_float(uchar value) +{ + return (float)value / 255.0f; +} +template<> +inline float cast_to_float(half value) +{ + return half_to_float(value); +} + +/* Cast float value to output pixel type. */ +template<typename T> +inline T cast_from_float(float value); + +template<> +inline float cast_from_float(float value) +{ + return value; +} +template<> +inline uchar cast_from_float(float value) +{ + if(value < 0.0f) { + return 0; + } + else if(value > (1.0f - 0.5f / 255.0f)) { + return 255; + } + return (uchar)((255.0f * value) + 0.5f); +} +template<> +inline half cast_from_float(float value) +{ + return float_to_half(value); +} + template<typename T> void util_image_downscale_sample(const vector<T>& pixels, const size_t width, @@ -71,15 +118,22 @@ void util_image_downscale_sample(const vector<T>& pixels, components, nx, ny, nz); for(size_t k = 0; k < components; ++k) { - accum[k] += pixel[k]; + accum[k] += cast_to_float(pixel[k]); } ++count; } } } - const float inv_count = 1.0f / (float)count; - for(size_t k = 0; k < components; ++k) { - result[k] = T(accum[k] * inv_count); + if(count != 0) { + const float inv_count = 1.0f / (float)count; + for(size_t k = 0; k < components; ++k) { + result[k] = cast_from_float<T>(accum[k] * inv_count); + } + } + else { + for(size_t k = 0; k < components; ++k) { + result[k] = T(0.0f); + } } } diff --git a/intern/cycles/util/util_logging.cpp b/intern/cycles/util/util_logging.cpp index 03041723e15..6824f1ff83c 100644 --- a/intern/cycles/util/util_logging.cpp +++ b/intern/cycles/util/util_logging.cpp @@ -69,6 +69,15 @@ void util_logging_verbosity_set(int verbosity) } std::ostream& operator <<(std::ostream &os, + const int2 &value) +{ + os << "(" << value.x + << ", " << value.y + << ")"; + return os; +} + +std::ostream& operator <<(std::ostream &os, const float3 &value) { os << "(" << value.x diff --git a/intern/cycles/util/util_logging.h b/intern/cycles/util/util_logging.h index 2aa9c25b1a0..ecf9c9cfee0 100644 --- a/intern/cycles/util/util_logging.h +++ b/intern/cycles/util/util_logging.h @@ -45,6 +45,7 @@ public: #define VLOG_ONCE(level, flag) if(!flag) flag = true, VLOG(level) +struct int2; struct float3; void util_logging_init(const char *argv0); @@ -52,6 +53,8 @@ void util_logging_start(void); void util_logging_verbosity_set(int verbosity); std::ostream& operator <<(std::ostream &os, + const int2 &value); +std::ostream& operator <<(std::ostream &os, const float3 &value); CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 2b81c8c498a..ae4b3d77f12 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -1329,7 +1329,7 @@ ccl_device_inline float3 safe_divide_even_color(float3 a, float3 b) y = (b.y != 0.0f)? a.y/b.y: 0.0f; z = (b.z != 0.0f)? a.z/b.z: 0.0f; - /* try to get grey even if b is zero */ + /* try to get gray even if b is zero */ if(b.x == 0.0f) { if(b.y == 0.0f) { x = z; diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp index 5df262fcbbb..1b2e8aace5b 100644 --- a/intern/cycles/util/util_path.cpp +++ b/intern/cycles/util/util_path.cpp @@ -814,7 +814,7 @@ string path_source_replace_includes(const string& source, /* Use line directives for better error messages. */ line = line_directive(filepath, 1) + token.replace(0, n_end + 1, "\n" + text + "\n") - + line_directive(path_join(path, source_filename), i); + + line_directive(path_join(path, source_filename), i + 1); } } } diff --git a/intern/cycles/util/util_ssef.h b/intern/cycles/util/util_ssef.h index 2f5295b5463..cf99a08efae 100644 --- a/intern/cycles/util/util_ssef.h +++ b/intern/cycles/util/util_ssef.h @@ -514,12 +514,12 @@ ccl_device_inline float len3(const ssef& a) /* faster version for SSSE3 */ typedef ssei shuffle_swap_t; -ccl_device_inline const shuffle_swap_t shuffle_swap_identity(void) +ccl_device_inline shuffle_swap_t shuffle_swap_identity(void) { return _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); } -ccl_device_inline const shuffle_swap_t shuffle_swap_swap(void) +ccl_device_inline shuffle_swap_t shuffle_swap_swap(void) { return _mm_set_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } @@ -534,12 +534,12 @@ ccl_device_inline const ssef shuffle_swap(const ssef& a, const shuffle_swap_t& s /* somewhat slower version for SSE2 */ typedef int shuffle_swap_t; -ccl_device_inline const shuffle_swap_t shuffle_swap_identity(void) +ccl_device_inline shuffle_swap_t shuffle_swap_identity(void) { return 0; } -ccl_device_inline const shuffle_swap_t shuffle_swap_swap(void) +ccl_device_inline shuffle_swap_t shuffle_swap_swap(void) { return 1; } diff --git a/intern/cycles/util/util_static_assert.h b/intern/cycles/util/util_static_assert.h index 033d85e8ec6..e90049254de 100644 --- a/intern/cycles/util/util_static_assert.h +++ b/intern/cycles/util/util_static_assert.h @@ -43,7 +43,9 @@ template <> class StaticAssertFailure<true> {}; # endif /* __COUNTER__ */ # endif /* C++11 or MSVC2015 */ #else /* __KERNEL_GPU__ */ -# define static_assert(statement, message) +# ifndef static_assert +# define static_assert(statement, message) +# endif #endif /* __KERNEL_GPU__ */ /* TODO(sergey): For until C++11 is a bare minimum for us, diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index a000fae4bd6..36d2f1053c7 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -37,6 +37,9 @@ #define ccl_device_noinline static #define ccl_global #define ccl_constant +#define ccl_local +#define ccl_local_param +#define ccl_private #define ccl_restrict __restrict #define __KERNEL_WITH_SSE_ALIGN__ @@ -397,11 +400,6 @@ ccl_device_inline float4 make_float4(float x, float y, float z, float w) return a; } -ccl_device_inline int align_up(int offset, int alignment) -{ - return (offset + alignment - 1) & ~(alignment - 1); -} - ccl_device_inline int3 make_int3(int i) { #ifdef __KERNEL_SSE__ @@ -476,6 +474,21 @@ ccl_device_inline int4 make_int4(const float3& f) #endif +ccl_device_inline int align_up(int offset, int alignment) +{ + return (offset + alignment - 1) & ~(alignment - 1); +} + +ccl_device_inline int round_up(int x, int multiple) +{ + return ((x + multiple - 1) / multiple) * multiple; +} + +ccl_device_inline int round_down(int x, int multiple) +{ + return (x / multiple) * multiple; +} + /* Interpolation types for textures * cuda also use texture space to store other objects */ enum InterpolationType { diff --git a/intern/ffmpeg/ffmpeg_compat.h b/intern/ffmpeg/ffmpeg_compat.h index bcfa24b06a8..d6220ebf562 100644 --- a/intern/ffmpeg/ffmpeg_compat.h +++ b/intern/ffmpeg/ffmpeg_compat.h @@ -350,7 +350,12 @@ int avcodec_decode_video2(AVCodecContext *avctx, AVFrame *picture, FFMPEG_INLINE int64_t av_get_pts_from_frame(AVFormatContext *avctx, AVFrame * picture) { - int64_t pts = picture->pkt_pts; + int64_t pts; +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(55, 34, 100) + pts = picture->pts; +#else + pts = picture->pkt_pts; +#endif if (pts == AV_NOPTS_VALUE) { pts = picture->pkt_dts; diff --git a/intern/libmv/libmv/numeric/numeric.h b/intern/libmv/libmv/numeric/numeric.h index a42dab8c7a2..1a23d653676 100644 --- a/intern/libmv/libmv/numeric/numeric.h +++ b/intern/libmv/libmv/numeric/numeric.h @@ -36,7 +36,7 @@ #if !defined(__MINGW64__) # if defined(_WIN32) || defined(__APPLE__) || \ defined(__FreeBSD__) || defined(__NetBSD__) -static void sincos(double x, double *sinx, double *cosx) { +inline void sincos(double x, double *sinx, double *cosx) { *sinx = sin(x); *cosx = cos(x); } @@ -5,8 +5,8 @@ REM This is for users who like to configure & build Blender with a single comman setlocal ENABLEEXTENSIONS set BLENDER_DIR=%~dp0 set BLENDER_DIR_NOSPACES=%BLENDER_DIR: =% -if not "%BLENDER_DIR%"=="%BLENDER_DIR_NOSPACES%" ( - echo There are spaces detected in the build path "%BLENDER_DIR%", this is currently not supported, exiting.... +if not "%BLENDER_DIR%"=="%BLENDER_DIR_NOSPACES%" ( + echo There are spaces detected in the build path "%BLENDER_DIR%", this is currently not supported, exiting.... goto EOF ) set BUILD_DIR=%BLENDER_DIR%..\build_windows @@ -79,7 +79,7 @@ if NOT "%1" == "" ( set NOBUILD=1 ) else if "%1" == "showhash" ( for /f "delims=" %%i in ('git rev-parse HEAD') do echo Branch_hash=%%i - cd release/datafiles/locale + cd release/datafiles/locale for /f "delims=" %%i in ('git rev-parse HEAD') do echo Locale_hash=%%i cd %~dp0 cd release/scripts/addons @@ -132,13 +132,13 @@ if "%BUILD_ARCH%"=="x64" ( if "%target%"=="Release" ( - rem for vc12 check for both cuda 7.5 and 8 + rem for vc12 check for both cuda 7.5 and 8 if "%CUDA_PATH%"=="" ( echo Cuda Not found, aborting! goto EOF ) set BUILD_CMAKE_ARGS=%BUILD_CMAKE_ARGS% ^ - -C"%BLENDER_DIR%\build_files\cmake\config\blender_release.cmake" + -C"%BLENDER_DIR%\build_files\cmake\config\blender_release.cmake" ) :DetectMSVC @@ -157,7 +157,7 @@ if DEFINED MSVC_VC_DIR goto msvc_detect_finally if DEFINED MSVC_VC_DIR call "%MSVC_VC_DIR%\vcvarsall.bat" if DEFINED MSVC_VC_DIR goto sanity_checks -rem MSVC Build environment 2017 and up. +rem MSVC Build environment 2017 and up. for /F "usebackq skip=2 tokens=1-2*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\VisualStudio\SXS\VS7" /v %BUILD_VS_VER%.0 2^>nul`) DO set MSVC_VS_DIR=%%C if DEFINED MSVC_VS_DIR goto msvc_detect_finally_2017 REM Check 32 bits @@ -202,7 +202,7 @@ if NOT EXIST %BLENDER_DIR%..\lib\nul ( if "%TARGET%"=="" ( echo Error: Convenience target not set echo This is required for building, aborting! - echo . + echo . goto HELP ) @@ -266,15 +266,15 @@ echo. echo At any point you can optionally modify your build configuration by editing: echo "%BUILD_DIR%\CMakeCache.txt", then run "make" again to build with the changes applied. echo. -echo Blender successfully built, run from: "%BUILD_DIR%\bin\%BUILD_TYPE%" +echo Blender successfully built, run from: "%BUILD_DIR%\bin\%BUILD_TYPE%\blender.exe" echo. goto EOF :HELP echo. echo Convenience targets - echo - release ^(identical to the offical blender.org builds^) + echo - release ^(identical to the official blender.org builds^) echo - full ^(same as release minus the cuda kernels^) - echo - lite + echo - lite echo - headless echo - cycles echo - bpy @@ -289,11 +289,10 @@ goto EOF echo - with_tests ^(enable building unit tests^) echo - debug ^(Build an unoptimized debuggable build^) echo - packagename [newname] ^(override default cpack package name^) - echo - x86 ^(override host autodetect and build 32 bit code^) - echo - x64 ^(override host autodetect and build 64 bit code^) + echo - x86 ^(override host auto-detect and build 32 bit code^) + echo - x64 ^(override host auto-detect and build 64 bit code^) echo - 2013 ^(build with visual studio 2013^) echo - 2015 ^(build with visual studio 2015^) [EXPERIMENTAL] echo. :EOF - diff --git a/release/scripts/freestyle/modules/freestyle/shaders.py b/release/scripts/freestyle/modules/freestyle/shaders.py index 633def38b5b..bce6642220b 100644 --- a/release/scripts/freestyle/modules/freestyle/shaders.py +++ b/release/scripts/freestyle/modules/freestyle/shaders.py @@ -568,7 +568,7 @@ class pyRandomColorShader(StrokeShader): class py2DCurvatureColorShader(StrokeShader): """ - Assigns a color (greyscale) to the stroke based on the curvature. + Assigns a color (grayscale) to the stroke based on the curvature. A higher curvature will yield a brighter color. """ def shade(self, stroke): @@ -584,7 +584,7 @@ class py2DCurvatureColorShader(StrokeShader): class pyTimeColorShader(StrokeShader): """ - Assigns a greyscale value that increases for every vertex. + Assigns a grayscale value that increases for every vertex. The brightness will increase along the stroke. """ def __init__(self, step=0.01): diff --git a/release/scripts/freestyle/modules/parameter_editor.py b/release/scripts/freestyle/modules/parameter_editor.py index 93305cb7c5a..b093920a4cb 100644 --- a/release/scripts/freestyle/modules/parameter_editor.py +++ b/release/scripts/freestyle/modules/parameter_editor.py @@ -1170,6 +1170,7 @@ class Seed: _seed = Seed() + def get_dashed_pattern(linestyle): """Extracts the dashed pattern from the various UI options """ pattern = [] @@ -1185,6 +1186,15 @@ def get_dashed_pattern(linestyle): return pattern +def get_grouped_objects(group): + for ob in group.objects: + if ob.dupli_type == 'GROUP' and ob.dupli_group is not None: + for dupli in get_grouped_objects(ob.dupli_group): + yield dupli + else: + yield ob + + integration_types = { 'MEAN': IntegrationType.MEAN, 'MIN': IntegrationType.MIN, @@ -1267,7 +1277,7 @@ def process(layer_name, lineset_name): # prepare selection criteria by group of objects if lineset.select_by_group: if lineset.group is not None: - names = {getQualifiedObjectName(ob): True for ob in lineset.group.objects} + names = {getQualifiedObjectName(ob): True for ob in get_grouped_objects(lineset.group)} upred = ObjectNamesUP1D(names, lineset.group_negation == 'EXCLUSIVE') selection_criteria.append(upred) # prepare selection criteria by image border diff --git a/release/scripts/modules/addon_utils.py b/release/scripts/modules/addon_utils.py index 0f096f5812c..886f078f046 100644 --- a/release/scripts/modules/addon_utils.py +++ b/release/scripts/modules/addon_utils.py @@ -31,8 +31,9 @@ __all__ = ( import bpy as _bpy _user_preferences = _bpy.context.user_preferences -error_duplicates = False error_encoding = False +# (name, file, path) +error_duplicates = [] addons_fake_modules = {} @@ -57,12 +58,11 @@ def paths(): def modules_refresh(module_cache=addons_fake_modules): - global error_duplicates global error_encoding import os - error_duplicates = False error_encoding = False + error_duplicates.clear() path_list = paths() @@ -168,7 +168,7 @@ def modules_refresh(module_cache=addons_fake_modules): if mod.__file__ != mod_path: print("multiple addons with the same name:\n %r\n %r" % (mod.__file__, mod_path)) - error_duplicates = True + error_duplicates.append((mod.bl_info["name"], mod.__file__, mod_path)) elif mod.__time__ != os.path.getmtime(mod_path): print("reloading addon:", diff --git a/release/scripts/presets/interface_theme/back_to_black.xml b/release/scripts/presets/interface_theme/back_to_black.xml index 915e9cb64f1..1636f5b5cf6 100644 --- a/release/scripts/presets/interface_theme/back_to_black.xml +++ b/release/scripts/presets/interface_theme/back_to_black.xml @@ -18,7 +18,7 @@ text_sel="#ffffff" show_shaded="TRUE" shadetop="-100" - shadedown="0"> + shadedown="5"> </ThemeWidgetColors> </wcol_regular> <wcol_tool> @@ -30,19 +30,19 @@ text_sel="#ffffff" show_shaded="TRUE" shadetop="-100" - shadedown="0"> + shadedown="5"> </ThemeWidgetColors> </wcol_tool> <wcol_radio> <ThemeWidgetColors outline="#2a2a2a" inner="#111111ff" inner_sel="#33406bff" - item="#191919ff" + item="#444444ff" text="#929292" text_sel="#ffffff" show_shaded="TRUE" shadetop="-100" - shadedown="0"> + shadedown="5"> </ThemeWidgetColors> </wcol_radio> <wcol_text> @@ -50,23 +50,23 @@ inner="#111111ff" inner_sel="#33406bff" item="#191919ff" - text="#e4e4e4" + text="#929292" text_sel="#ffffff" show_shaded="TRUE" shadetop="-100" - shadedown="0"> + shadedown="5"> </ThemeWidgetColors> </wcol_text> <wcol_option> - <ThemeWidgetColors outline="#2a2a2a" + <ThemeWidgetColors outline="#535353" inner="#111111ff" inner_sel="#33406bff" - item="#000000ff" - text="#c7c7c7" + item="#a3a3a3ff" + text="#929292" text_sel="#ffffff" show_shaded="TRUE" shadetop="-100" - shadedown="0"> + shadedown="5"> </ThemeWidgetColors> </wcol_option> <wcol_toggle> @@ -78,7 +78,7 @@ text_sel="#ffffff" show_shaded="TRUE" shadetop="-100" - shadedown="0"> + shadedown="5"> </ThemeWidgetColors> </wcol_toggle> <wcol_num> @@ -90,7 +90,7 @@ text_sel="#ffffff" show_shaded="TRUE" shadetop="-100" - shadedown="0"> + shadedown="5"> </ThemeWidgetColors> </wcol_num> <wcol_numslider> @@ -102,7 +102,7 @@ text_sel="#ffffff" show_shaded="TRUE" shadetop="-100" - shadedown="0"> + shadedown="5"> </ThemeWidgetColors> </wcol_numslider> <wcol_box> @@ -114,7 +114,7 @@ text_sel="#ffffff" show_shaded="TRUE" shadetop="-100" - shadedown="0"> + shadedown="5"> </ThemeWidgetColors> </wcol_box> <wcol_menu> @@ -126,7 +126,7 @@ text_sel="#ffffff" show_shaded="TRUE" shadetop="-100" - shadedown="0"> + shadedown="5"> </ThemeWidgetColors> </wcol_menu> <wcol_pulldown> @@ -138,7 +138,7 @@ text_sel="#ffffff" show_shaded="TRUE" shadetop="-100" - shadedown="0"> + shadedown="5"> </ThemeWidgetColors> </wcol_pulldown> <wcol_menu_back> @@ -150,7 +150,7 @@ text_sel="#ffffff" show_shaded="TRUE" shadetop="-100" - shadedown="0"> + shadedown="5"> </ThemeWidgetColors> </wcol_menu_back> <wcol_pie_menu> @@ -170,7 +170,7 @@ inner="#191919e6" inner_sel="#2d2d2de6" item="#646464ff" - text="#ffffff" + text="#929292" text_sel="#ffffff" show_shaded="FALSE" shadetop="25" @@ -186,7 +186,7 @@ text_sel="#ffffff" show_shaded="TRUE" shadetop="-100" - shadedown="0"> + shadedown="5"> </ThemeWidgetColors> </wcol_menu_item> <wcol_scroll> @@ -198,7 +198,7 @@ text_sel="#ffffff" show_shaded="TRUE" shadetop="-100" - shadedown="0"> + shadedown="5"> </ThemeWidgetColors> </wcol_scroll> <wcol_progress> @@ -210,7 +210,7 @@ text_sel="#ffffff" show_shaded="TRUE" shadetop="-100" - shadedown="0"> + shadedown="5"> </ThemeWidgetColors> </wcol_progress> <wcol_list_item> @@ -222,7 +222,7 @@ text_sel="#ffffff" show_shaded="TRUE" shadetop="-100" - shadedown="0"> + shadedown="5"> </ThemeWidgetColors> </wcol_list_item> <wcol_state> @@ -239,32 +239,35 @@ </user_interface> <view_3d> <ThemeView3D grid="#222222" + clipping_border_3d="#313131ff" wire="#888888" - wire_edit="#000000" + wire_edit="#6c75ff" gp_vertex="#000000" gp_vertex_select="#ff8500" gp_vertex_size="3" - lamp="#c1d40028" - speaker="#535353" - camera="#000000" - view_overlay="#000000" - empty="#000000" + text_grease_pencil="#b5e61d" object_selected="#f15800" object_active="#ff8c19" object_grouped="#083008" object_grouped_active="#55bb55" - transform="#ffffff" + text_keyframe="#ddd700" + camera="#535353" + empty="#535353" + lamp="#fff0d328" + speaker="#535353" vertex="#72cfdd" vertex_select="#ff8500" vertex_size="3" + vertex_bevel="#00a5ff" vertex_unreferenced="#000000" edge_select="#ffa000" edge_seam="#db2512" edge_sharp="#ff2020" edge_crease="#cc0099" + edge_bevel="#00a5ff" edge_facesel="#6b6b6b" freestyle_edge_mark="#7fff7f" - face="#73828f12" + face="#73828f41" face_select="#ffa4003c" face_dot="#ffa900" facedot_size="4" @@ -291,19 +294,18 @@ normal="#22dddd" vertex_normal="#2361dd" split_normal="#dd23dd" - bone_solid="#c8c8c8" bone_pose="#50c8ff" bone_pose_active="#8cffff" - frame_current="#60c040" - outline_width="1" + bone_solid="#c8c8c8" bundle_solid="#c8c8c8" camera_path="#5a5a5a" skin_root="#000000" - clipping_border_3d="#313131ff" - text_keyframe="#ddd700" - text_grease_pencil="#b5e61d" + view_overlay="#000000" + transform="#ffffff" + frame_current="#60c040" paint_curve_handle="#7fff7f7f" - paint_curve_pivot="#ff7f7f7f"> + paint_curve_pivot="#ff7f7f7f" + outline_width="1"> <space> <ThemeSpaceGradient title="#5d5d5d" text="#7d7d7d" @@ -312,23 +314,23 @@ header_text="#979797" header_text_hi="#ffffff" button="#00000057" - button_title="#c5c5c5" + button_title="#929292" button_text="#c3c3c3" - button_text_hi="#ffffff" + button_text_hi="#e5e5e5" tab_active="#212947" tab_inactive="#000000" tab_back="#060606ff" tab_outline="#000000"> <gradients> <ThemeGradientColors show_grad="TRUE" - gradient="#0a0a0a" + gradient="#1d1d1d" high_gradient="#000000"> </ThemeGradientColors> </gradients> <panelcolors> <ThemePanelColors header="#00000019" back="#72727280" - show_header="FALSE" + show_header="TRUE" show_back="FALSE"> </ThemePanelColors> </panelcolors> @@ -348,6 +350,7 @@ vertex="#ffffff" vertex_select="#ff8500" vertex_size="3" + vertex_bevel="#000000" vertex_unreferenced="#000000" handle_free="#808080" handle_auto="#909000" @@ -382,7 +385,7 @@ <panelcolors> <ThemePanelColors header="#00000019" back="#72727280" - show_header="FALSE" + show_header="TRUE" show_back="FALSE"> </ThemePanelColors> </panelcolors> @@ -418,7 +421,7 @@ <panelcolors> <ThemePanelColors header="#00000019" back="#72727280" - show_header="FALSE" + show_header="TRUE" show_back="FALSE"> </ThemePanelColors> </panelcolors> @@ -463,7 +466,7 @@ <panelcolors> <ThemePanelColors header="#00000019" back="#72727280" - show_header="FALSE" + show_header="TRUE" show_back="FALSE"> </ThemePanelColors> </panelcolors> @@ -501,6 +504,7 @@ keyframe_jitter_selected="#61c042" keyframe_border="#000000ff" keyframe_border_selected="#000000ff" + keyframe_scale_factor="1" summary="#00000000"> <space> <ThemeSpaceGeneric back="#080808" @@ -521,7 +525,7 @@ <panelcolors> <ThemePanelColors header="#00000019" back="#72727280" - show_header="FALSE" + show_header="TRUE" show_back="FALSE"> </ThemePanelColors> </panelcolors> @@ -543,6 +547,7 @@ vertex="#0f13bb" vertex_select="#ff8500" vertex_size="3" + vertex_bevel="#000000" vertex_unreferenced="#000000" face="#ffffff0a" face_select="#ff85003c" @@ -596,7 +601,7 @@ <panelcolors> <ThemePanelColors header="#00000019" back="#72727280" - show_header="FALSE" + show_header="TRUE" show_back="FALSE"> </ThemePanelColors> </panelcolors> @@ -644,7 +649,7 @@ <panelcolors> <ThemePanelColors header="#00000019" back="#72727280" - show_header="FALSE" + show_header="TRUE" show_back="FALSE"> </ThemePanelColors> </panelcolors> @@ -673,7 +678,7 @@ <panelcolors> <ThemePanelColors header="#00000019" back="#72727280" - show_header="FALSE" + show_header="TRUE" show_back="FALSE"> </ThemePanelColors> </panelcolors> @@ -712,7 +717,7 @@ <panelcolors> <ThemePanelColors header="#00000019" back="#72727280" - show_header="FALSE" + show_header="TRUE" show_back="FALSE"> </ThemePanelColors> </panelcolors> @@ -744,7 +749,7 @@ <panelcolors> <ThemePanelColors header="#00000019" back="#72727280" - show_header="FALSE" + show_header="TRUE" show_back="FALSE"> </ThemePanelColors> </panelcolors> @@ -799,7 +804,7 @@ <panelcolors> <ThemePanelColors header="#00000019" back="#72727280" - show_header="FALSE" + show_header="TRUE" show_back="FALSE"> </ThemePanelColors> </panelcolors> @@ -835,7 +840,7 @@ <panelcolors> <ThemePanelColors header="#00000019" back="#72727280" - show_header="FALSE" + show_header="TRUE" show_back="FALSE"> </ThemePanelColors> </panelcolors> @@ -865,7 +870,7 @@ <panelcolors> <ThemePanelColors header="#00000019" back="#72727280" - show_header="FALSE" + show_header="TRUE" show_back="FALSE"> </ThemePanelColors> </panelcolors> @@ -903,7 +908,7 @@ <panelcolors> <ThemePanelColors header="#00000019" back="#72727280" - show_header="FALSE" + show_header="TRUE" show_back="FALSE"> </ThemePanelColors> </panelcolors> @@ -932,7 +937,7 @@ <panelcolors> <ThemePanelColors header="#00000019" back="#72727280" - show_header="FALSE" + show_header="TRUE" show_back="FALSE"> </ThemePanelColors> </panelcolors> @@ -966,7 +971,7 @@ <panelcolors> <ThemePanelColors header="#00000019" back="#72727280" - show_header="FALSE" + show_header="TRUE" show_back="FALSE"> </ThemePanelColors> </panelcolors> @@ -1019,7 +1024,7 @@ <panelcolors> <ThemePanelColors header="#00000019" back="#72727280" - show_header="FALSE" + show_header="TRUE" show_back="FALSE"> </ThemePanelColors> </panelcolors> diff --git a/release/scripts/presets/keyconfig/3dsmax.py b/release/scripts/presets/keyconfig/3dsmax.py index 7694e338d68..6d05ff6982c 100644 --- a/release/scripts/presets/keyconfig/3dsmax.py +++ b/release/scripts/presets/keyconfig/3dsmax.py @@ -401,6 +401,12 @@ kmi = km.keymap_items.new('particle.hide', 'H', 'PRESS') kmi.properties.unselected = False kmi = km.keymap_items.new('particle.hide', 'H', 'PRESS', shift=True) kmi.properties.unselected = True +kmi = km.keymap_items.new('view3d.manipulator', 'LEFTMOUSE', 'PRESS', shift=True) +kmi.properties.release_confirm = True +kmi.properties.use_planar_constraint= True +kmi = km.keymap_items.new('view3d.manipulator', 'LEFTMOUSE', 'PRESS', shift=True) +kmi.properties.release_confirm = True +kmi.properties.use_accurate= True kmi = km.keymap_items.new('view3d.manipulator', 'LEFTMOUSE', 'PRESS', any=True) kmi.properties.release_confirm = True kmi = km.keymap_items.new('particle.brush_edit', 'LEFTMOUSE', 'PRESS') @@ -422,6 +428,12 @@ kmi.properties.value_2 = 'ENABLED' # Map 3D View km = kc.keymaps.new('3D View', space_type='VIEW_3D', region_type='WINDOW', modal=False) +kmi = km.keymap_items.new('view3d.manipulator', 'LEFTMOUSE', 'PRESS', shift=True) +kmi.properties.release_confirm = True +kmi.properties.use_planar_constraint= True +kmi = km.keymap_items.new('view3d.manipulator', 'LEFTMOUSE', 'PRESS', shift=True) +kmi.properties.release_confirm = True +kmi.properties.use_accurate= True kmi = km.keymap_items.new('view3d.manipulator', 'LEFTMOUSE', 'PRESS', any=True) kmi.properties.release_confirm = True kmi = km.keymap_items.new('view3d.cursor3d', 'ACTIONMOUSE', 'PRESS') diff --git a/release/scripts/presets/keyconfig/maya.py b/release/scripts/presets/keyconfig/maya.py index 67fd1fddcac..53129593a59 100644 --- a/release/scripts/presets/keyconfig/maya.py +++ b/release/scripts/presets/keyconfig/maya.py @@ -933,6 +933,12 @@ km = kc.keymaps.new('3D View', space_type='VIEW_3D', region_type='WINDOW', modal kmi = km.keymap_items.new('view3d.cursor3d', 'ACTIONMOUSE', 'PRESS') kmi = km.keymap_items.new('view3d.rotate', 'LEFTMOUSE', 'PRESS', alt=True) +kmi = km.keymap_items.new('view3d.manipulator', 'LEFTMOUSE', 'PRESS', shift=True) +kmi.properties.release_confirm = True +kmi.properties.use_planar_constraint = True +kmi = km.keymap_items.new('view3d.manipulator', 'LEFTMOUSE', 'PRESS', shift=True) +kmi.properties.release_confirm = True +kmi.properties.use_accurate = True kmi = km.keymap_items.new('view3d.manipulator', 'LEFTMOUSE', 'PRESS', any=True) kmi.properties.release_confirm = True kmi = km.keymap_items.new('view3d.move', 'MIDDLEMOUSE', 'PRESS', alt=True) diff --git a/release/scripts/startup/bl_operators/object_quick_effects.py b/release/scripts/startup/bl_operators/object_quick_effects.py index ef10e279bb4..0b9e7fd7305 100644 --- a/release/scripts/startup/bl_operators/object_quick_effects.py +++ b/release/scripts/startup/bl_operators/object_quick_effects.py @@ -319,7 +319,7 @@ class QuickSmoke(Operator): def execute(self, context): if not bpy.app.build_options.mod_smoke: - self.report({'ERROR'}, "Build without Smoke modifier support") + self.report({'ERROR'}, "Built without Smoke modifier support") return {'CANCELLED'} fake_context = context.copy() @@ -568,7 +568,7 @@ class QuickFluid(Operator): def execute(self, context): if not bpy.app.build_options.mod_fluid: - self.report({'ERROR'}, "Build without Fluid modifier support") + self.report({'ERROR'}, "Built without Fluid modifier support") return {'CANCELLED'} fake_context = context.copy() diff --git a/release/scripts/startup/bl_operators/wm.py b/release/scripts/startup/bl_operators/wm.py index f5460d58d44..869070ed778 100644 --- a/release/scripts/startup/bl_operators/wm.py +++ b/release/scripts/startup/bl_operators/wm.py @@ -1008,11 +1008,9 @@ class WM_OT_doc_view(Operator): doc_id = doc_id if bpy.app.version_cycle == "release": - _prefix = ("https://www.blender.org/api/blender_python_api_%s%s_release" % - ("_".join(str(v) for v in bpy.app.version[:2]), bpy.app.version_char)) + _prefix = ("https://docs.blender.org/api/blender_python_api_current") else: - _prefix = ("https://www.blender.org/api/blender_python_api_%s" % - "_".join(str(v) for v in bpy.app.version)) + _prefix = ("https://docs.blender.org/api/blender_python_api_master") def execute(self, context): url = _wm_doc_get_id(self.doc_id, do_url=True, url_prefix=self._prefix) diff --git a/release/scripts/startup/bl_ui/properties_data_modifier.py b/release/scripts/startup/bl_ui/properties_data_modifier.py index d66fb08bcd6..68b6265ab15 100644 --- a/release/scripts/startup/bl_ui/properties_data_modifier.py +++ b/release/scripts/startup/bl_ui/properties_data_modifier.py @@ -951,6 +951,23 @@ class DATA_PT_modifiers(ModifierButtonsPanel, Panel): def SURFACE(self, layout, ob, md): layout.label(text="Settings are inside the Physics tab") + def SURFACE_DEFORM(self, layout, ob, md): + col = layout.column() + col.active = not md.is_bound + + col.prop(md, "target") + col.prop(md, "falloff") + + layout.separator() + + col = layout.column() + col.active = md.target is not None + + if md.is_bound: + col.operator("object.surfacedeform_bind", text="Unbind") + else: + col.operator("object.surfacedeform_bind", text="Bind") + def UV_PROJECT(self, layout, ob, md): split = layout.split() @@ -1320,7 +1337,9 @@ class DATA_PT_modifiers(ModifierButtonsPanel, Panel): row.prop(md, "thickness_vertex_group", text="Factor") col.prop(md, "use_crease", text="Crease Edges") - col.prop(md, "crease_weight", text="Crease Weight") + row = col.row() + row.active = md.use_crease + row.prop(md, "crease_weight", text="Crease Weight") col = split.column() diff --git a/release/scripts/startup/bl_ui/properties_object.py b/release/scripts/startup/bl_ui/properties_object.py index 3ff7a248c60..4a596981983 100644 --- a/release/scripts/startup/bl_ui/properties_object.py +++ b/release/scripts/startup/bl_ui/properties_object.py @@ -152,6 +152,33 @@ class OBJECT_PT_relations(ObjectButtonsPanel, Panel): sub.active = (parent is not None) +class OBJECT_PT_relations_extras(ObjectButtonsPanel, Panel): + bl_label = "Relations Extras" + bl_options = {'DEFAULT_CLOSED'} + + def draw(self, context): + layout = self.layout + + ob = context.object + + split = layout.split() + + if context.scene.render.engine != 'BLENDER_GAME': + col = split.column() + col.label(text="Tracking Axes:") + col.prop(ob, "track_axis", text="Axis") + col.prop(ob, "up_axis", text="Up Axis") + + col = split.column() + col.prop(ob, "use_slow_parent") + row = col.row() + row.active = ((ob.parent is not None) and (ob.use_slow_parent)) + row.prop(ob, "slow_parent_offset", text="Offset") + + layout.prop(ob, "use_extra_recalc_object") + layout.prop(ob, "use_extra_recalc_data") + + class GROUP_MT_specials(Menu): bl_label = "Group Specials" @@ -296,33 +323,6 @@ class OBJECT_PT_duplication(ObjectButtonsPanel, Panel): layout.prop(ob, "dupli_group", text="Group") -class OBJECT_PT_relations_extras(ObjectButtonsPanel, Panel): - bl_label = "Relations Extras" - bl_options = {'DEFAULT_CLOSED'} - - def draw(self, context): - layout = self.layout - - ob = context.object - - split = layout.split() - - if context.scene.render.engine != 'BLENDER_GAME': - col = split.column() - col.label(text="Tracking Axes:") - col.prop(ob, "track_axis", text="Axis") - col.prop(ob, "up_axis", text="Up Axis") - - col = split.column() - col.prop(ob, "use_slow_parent") - row = col.row() - row.active = ((ob.parent is not None) and (ob.use_slow_parent)) - row.prop(ob, "slow_parent_offset", text="Offset") - - layout.prop(ob, "use_extra_recalc_object") - layout.prop(ob, "use_extra_recalc_data") - - from bl_ui.properties_animviz import ( MotionPathButtonsPanel, OnionSkinButtonsPanel, diff --git a/release/scripts/startup/bl_ui/properties_physics_common.py b/release/scripts/startup/bl_ui/properties_physics_common.py index 277b59d187d..4478c6a4379 100644 --- a/release/scripts/startup/bl_ui/properties_physics_common.py +++ b/release/scripts/startup/bl_ui/properties_physics_common.py @@ -274,6 +274,8 @@ def basic_force_field_settings_ui(self, context, field): col.prop(field, "use_global_coords", text="Global") elif field.type == 'HARMONIC': col.prop(field, "use_multiple_springs") + if field.type == 'FORCE': + col.prop(field, "use_gravity_falloff", text="Gravitation") split = layout.split() diff --git a/release/scripts/startup/bl_ui/space_sequencer.py b/release/scripts/startup/bl_ui/space_sequencer.py index 8ab5b4724b8..67ffaa7fccf 100644 --- a/release/scripts/startup/bl_ui/space_sequencer.py +++ b/release/scripts/startup/bl_ui/space_sequencer.py @@ -668,14 +668,14 @@ class SEQUENCER_PT_effect(SequencerButtonsPanel, Panel): if (i % BT_ROW) == 1: row = col.row(align=True) - # Workaround - .active has to have a separate UI block to work + # Workaround - .enabled has to have a separate UI block to work if i == strip.multicam_source: sub = row.row(align=True) - sub.active = False + sub.enabled = False sub.operator("sequencer.cut_multicam", text="%d" % i).camera = i else: sub_1 = row.row(align=True) - sub_1.active = True + sub_1.enabled = True sub_1.operator("sequencer.cut_multicam", text="%d" % i).camera = i if strip.channel > BT_ROW and (strip_channel - 1) % BT_ROW: @@ -683,7 +683,7 @@ class SEQUENCER_PT_effect(SequencerButtonsPanel, Panel): row.label("") else: col.separator() - col.label(text="Two or more channels are needed below this strip.", icon="INFO") + col.label(text="Two or more channels are needed below this strip", icon="INFO") elif strip.type == 'TEXT': diff --git a/release/scripts/startup/bl_ui/space_time.py b/release/scripts/startup/bl_ui/space_time.py index 508e62e4f56..1f62d8d6968 100644 --- a/release/scripts/startup/bl_ui/space_time.py +++ b/release/scripts/startup/bl_ui/space_time.py @@ -49,7 +49,10 @@ class TIME_HT_header(Header): row.prop(scene, "frame_preview_start", text="Start") row.prop(scene, "frame_preview_end", text="End") - layout.prop(scene, "frame_current", text="") + if scene.show_subframe: + layout.prop(scene, "frame_float", text="") + else: + layout.prop(scene, "frame_current", text="") layout.separator() @@ -135,6 +138,7 @@ class TIME_MT_view(Menu): layout.prop(st, "show_frame_indicator") layout.prop(scene, "show_keys_from_selected_only") + layout.prop(scene, "show_subframe") layout.separator() diff --git a/release/scripts/startup/bl_ui/space_userpref.py b/release/scripts/startup/bl_ui/space_userpref.py index 075a6f870fa..cd12255c24c 100644 --- a/release/scripts/startup/bl_ui/space_userpref.py +++ b/release/scripts/startup/bl_ui/space_userpref.py @@ -453,6 +453,7 @@ class USERPREF_PT_system(Panel): col.separator() col.label(text="Selection") col.prop(system, "select_method", text="") + col.prop(system, "use_select_pick_depth") col.separator() @@ -1243,7 +1244,7 @@ class USERPREF_MT_addons_online_resources(Menu): "wm.url_open", text="API Concepts", icon='URL', ).url = bpy.types.WM_OT_doc_view._prefix + "/info_quickstart.html" layout.operator("wm.url_open", text="Add-on Tutorial", icon='URL', - ).url = "http://www.blender.org/api/blender_python_api_current/info_tutorial_addon.html" + ).url = bpy.types.WM_OT_doc_view._prefix + "/info_tutorial_addon.html" class USERPREF_PT_addons(Panel): @@ -1317,11 +1318,18 @@ class USERPREF_PT_addons(Panel): # set in addon_utils.modules_refresh() if addon_utils.error_duplicates: - self.draw_error(col, - "Multiple addons using the same name found!\n" - "likely a problem with the script search path.\n" - "(see console for details)", - ) + box = col.box() + row = box.row() + row.label("Multiple addons with the same name found!") + row.label(icon='ERROR') + box.label("Please delete one of each pair:") + for (addon_name, addon_file, addon_path) in addon_utils.error_duplicates: + box.separator() + sub_col = box.column(align=True) + sub_col.label(addon_name + ":") + sub_col.label(" " + addon_file) + sub_col.label(" " + addon_path) + if addon_utils.error_encoding: self.draw_error(col, diff --git a/source/blender/alembic/intern/abc_archive.cc b/source/blender/alembic/intern/abc_archive.cc index 0985a06d732..5f8fc1a3739 100644 --- a/source/blender/alembic/intern/abc_archive.cc +++ b/source/blender/alembic/intern/abc_archive.cc @@ -113,25 +113,25 @@ static OArchive create_archive(std::ostream *ostream, Alembic::Abc::MetaData &md, bool ogawa) { - md.set(Alembic::Abc::kApplicationNameKey, "Blender"); + md.set(Alembic::Abc::kApplicationNameKey, "Blender"); md.set(Alembic::Abc::kUserDescriptionKey, scene_name); - time_t raw_time; - time(&raw_time); - char buffer[128]; + time_t raw_time; + time(&raw_time); + char buffer[128]; #if defined _WIN32 || defined _WIN64 - ctime_s(buffer, 128, &raw_time); + ctime_s(buffer, 128, &raw_time); #else - ctime_r(&raw_time, buffer); + ctime_r(&raw_time, buffer); #endif - const std::size_t buffer_len = strlen(buffer); - if (buffer_len > 0 && buffer[buffer_len - 1] == '\n') { - buffer[buffer_len - 1] = '\0'; - } + const std::size_t buffer_len = strlen(buffer); + if (buffer_len > 0 && buffer[buffer_len - 1] == '\n') { + buffer[buffer_len - 1] = '\0'; + } - md.set(Alembic::Abc::kDateWrittenKey, buffer); + md.set(Alembic::Abc::kDateWrittenKey, buffer); ErrorHandler::Policy policy = ErrorHandler::kThrowPolicy; diff --git a/source/blender/alembic/intern/abc_curves.cc b/source/blender/alembic/intern/abc_curves.cc index 282777f3af0..0542255d84b 100644 --- a/source/blender/alembic/intern/abc_curves.cc +++ b/source/blender/alembic/intern/abc_curves.cc @@ -361,7 +361,7 @@ void read_curve_sample(Curve *cu, const ICurvesSchema &schema, const float time) * object directly and create a new DerivedMesh from that. Also we might need to * create new or delete existing NURBS in the curve. */ -DerivedMesh *AbcCurveReader::read_derivedmesh(DerivedMesh */*dm*/, const float time, int /*read_flag*/, const char **/*err_str*/) +DerivedMesh *AbcCurveReader::read_derivedmesh(DerivedMesh * /*dm*/, const float time, int /*read_flag*/, const char ** /*err_str*/) { ISampleSelector sample_sel(time); const ICurvesSchema::Sample sample = m_curves_schema.getValue(sample_sel); diff --git a/source/blender/alembic/intern/abc_customdata.cc b/source/blender/alembic/intern/abc_customdata.cc index ebf1b2ba96e..0d11ab79ddd 100644 --- a/source/blender/alembic/intern/abc_customdata.cc +++ b/source/blender/alembic/intern/abc_customdata.cc @@ -327,6 +327,11 @@ static void read_custom_data_ex(const ICompoundProperty &prop, } else if (data_type == CD_MLOOPUV) { IV2fGeomParam uv_param(prop, prop_header.getName()); + + if (!uv_param.isIndexed()) { + return; + } + IV2fGeomParam::Sample sample; uv_param.getIndexed(sample, iss); diff --git a/source/blender/alembic/intern/abc_exporter.cc b/source/blender/alembic/intern/abc_exporter.cc index d17506ff8b0..90a99469389 100644 --- a/source/blender/alembic/intern/abc_exporter.cc +++ b/source/blender/alembic/intern/abc_exporter.cc @@ -47,7 +47,7 @@ extern "C" { #ifdef WIN32 /* needed for MSCV because of snprintf from BLI_string */ -# include "BLI_winstuff.h" +# include "BLI_winstuff.h" #endif #include "BKE_anim.h" diff --git a/source/blender/alembic/intern/abc_mesh.cc b/source/blender/alembic/intern/abc_mesh.cc index 8bc9c335054..5a57e43326a 100644 --- a/source/blender/alembic/intern/abc_mesh.cc +++ b/source/blender/alembic/intern/abc_mesh.cc @@ -691,7 +691,7 @@ static void assign_materials(Main *bmain, Object *ob, const std::map<std::string assigned_name = mat_iter->second; } - assign_material(ob, assigned_name, it->second, BKE_MAT_ASSIGN_OBJECT); + assign_material(ob, assigned_name, it->second, BKE_MAT_ASSIGN_OBDATA); } } } diff --git a/source/blender/alembic/intern/abc_points.cc b/source/blender/alembic/intern/abc_points.cc index 4c78f3e83c7..fc84759b1d9 100644 --- a/source/blender/alembic/intern/abc_points.cc +++ b/source/blender/alembic/intern/abc_points.cc @@ -200,7 +200,7 @@ void read_points_sample(const IPointsSchema &schema, read_mverts(config.mvert, positions, vnormals); } -DerivedMesh *AbcPointsReader::read_derivedmesh(DerivedMesh *dm, const float time, int /*read_flag*/, const char **/*err_str*/) +DerivedMesh *AbcPointsReader::read_derivedmesh(DerivedMesh *dm, const float time, int /*read_flag*/, const char ** /*err_str*/) { ISampleSelector sample_sel(time); const IPointsSchema::Sample sample = m_schema.getValue(sample_sel); diff --git a/source/blender/alembic/intern/abc_transform.cc b/source/blender/alembic/intern/abc_transform.cc index 368a811bb2a..2c6ef09326c 100644 --- a/source/blender/alembic/intern/abc_transform.cc +++ b/source/blender/alembic/intern/abc_transform.cc @@ -122,7 +122,7 @@ Imath::Box3d AbcTransformWriter::bounds() return Imath::transform(bounds, m_matrix); } -bool AbcTransformWriter::hasAnimation(Object */*ob*/) const +bool AbcTransformWriter::hasAnimation(Object * /*ob*/) const { /* TODO(kevin): implement this. */ return true; diff --git a/source/blender/alembic/intern/abc_util.cc b/source/blender/alembic/intern/abc_util.cc index 08c94f437e6..50fa43a3491 100644 --- a/source/blender/alembic/intern/abc_util.cc +++ b/source/blender/alembic/intern/abc_util.cc @@ -37,6 +37,8 @@ extern "C" { #include "DNA_object_types.h" #include "BLI_math.h" + +#include "PIL_time.h" } std::string get_id_name(Object *ob) @@ -523,3 +525,15 @@ AbcObjectReader *create_reader(const Alembic::AbcGeom::IObject &object, ImportSe return reader; } + +/* ********************** */ + +ScopeTimer::ScopeTimer(const char *message) + : m_message(message) + , m_start(PIL_check_seconds_timer()) +{} + +ScopeTimer::~ScopeTimer() +{ + fprintf(stderr, "%s: %fs\n", m_message, PIL_check_seconds_timer() - m_start); +} diff --git a/source/blender/alembic/intern/abc_util.h b/source/blender/alembic/intern/abc_util.h index a7ac9df91c7..85ba4d5c9c7 100644 --- a/source/blender/alembic/intern/abc_util.h +++ b/source/blender/alembic/intern/abc_util.h @@ -146,4 +146,23 @@ ABC_INLINE void copy_yup_from_zup(short yup[3], const short zup[3]) yup[2] = -zup[1]; } +/* *************************** */ + +#undef ABC_DEBUG_TIME + +class ScopeTimer { + const char *m_message; + double m_start; + +public: + ScopeTimer(const char *message); + ~ScopeTimer(); +}; + +#ifdef ABC_DEBUG_TIME +# define SCOPE_TIMER(message) ScopeTimer prof(message) +#else +# define SCOPE_TIMER(message) +#endif + #endif /* __ABC_UTIL_H__ */ diff --git a/source/blender/alembic/intern/alembic_capi.cc b/source/blender/alembic/intern/alembic_capi.cc index d8d017119b1..dc5146a26e0 100644 --- a/source/blender/alembic/intern/alembic_capi.cc +++ b/source/blender/alembic/intern/alembic_capi.cc @@ -542,6 +542,8 @@ ABC_INLINE bool is_mesh_and_strands(const IObject &object) static void import_startjob(void *user_data, short *stop, short *do_update, float *progress) { + SCOPE_TIMER("Alembic import, objects reading and creation"); + ImportJobData *data = static_cast<ImportJobData *>(user_data); data->stop = stop; @@ -677,6 +679,8 @@ static void import_startjob(void *user_data, short *stop, short *do_update, floa static void import_endjob(void *user_data) { + SCOPE_TIMER("Alembic import, cleanup"); + ImportJobData *data = static_cast<ImportJobData *>(user_data); std::vector<AbcObjectReader *>::iterator iter; diff --git a/source/blender/blenkernel/BKE_bvhutils.h b/source/blender/blenkernel/BKE_bvhutils.h index bf45a27e51c..cb72f0859d5 100644 --- a/source/blender/blenkernel/BKE_bvhutils.h +++ b/source/blender/blenkernel/BKE_bvhutils.h @@ -54,7 +54,6 @@ typedef struct BVHTreeFromEditMesh { /* default callbacks to bvh nearest and raycast */ BVHTree_NearestPointCallback nearest_callback; BVHTree_RayCastCallback raycast_callback; - BVHTree_NearestToRayCallback nearest_to_ray_callback; struct BMEditMesh *em; @@ -75,7 +74,6 @@ typedef struct BVHTreeFromMesh { /* default callbacks to bvh nearest and raycast */ BVHTree_NearestPointCallback nearest_callback; BVHTree_RayCastCallback raycast_callback; - BVHTree_NearestToRayCallback nearest_to_ray_callback; /* Vertex array, so that callbacks have instante access to data */ const struct MVert *vert; @@ -104,7 +102,7 @@ typedef struct BVHTreeFromMesh { * The tree is build in mesh space coordinates, this means special care must be made on queries * so that the coordinates and rays are first translated on the mesh local coordinates. * Reason for this is that bvh_from_mesh_* can use a cache in some cases and so it becomes possible to reuse a BVHTree. - * + * * free_bvhtree_from_mesh should be called when the tree is no longer needed. */ BVHTree *bvhtree_from_editmesh_verts( @@ -118,7 +116,7 @@ BVHTree *bvhtree_from_editmesh_verts_ex( BVHTree *bvhtree_from_mesh_verts( struct BVHTreeFromMesh *data, struct DerivedMesh *mesh, float epsilon, int tree_type, int axis); BVHTree *bvhtree_from_mesh_verts_ex( - struct BVHTreeFromMesh *data, struct MVert *vert, const int numVerts, + struct BVHTreeFromMesh *data, const struct MVert *vert, const int numVerts, const bool vert_allocated, const BLI_bitmap *mask, int verts_num_active, float epsilon, int tree_type, int axis); @@ -135,8 +133,8 @@ BVHTree *bvhtree_from_mesh_edges( float epsilon, int tree_type, int axis); BVHTree *bvhtree_from_mesh_edges_ex( struct BVHTreeFromMesh *data, - struct MVert *vert, const bool vert_allocated, - struct MEdge *edge, const int edges_num, const bool edge_allocated, + const struct MVert *vert, const bool vert_allocated, + const struct MEdge *edge, const int edges_num, const bool edge_allocated, const BLI_bitmap *edges_mask, int edges_num_active, float epsilon, int tree_type, int axis); @@ -145,8 +143,8 @@ BVHTree *bvhtree_from_mesh_faces( int tree_type, int axis); BVHTree *bvhtree_from_mesh_faces_ex( struct BVHTreeFromMesh *data, - struct MVert *vert, const bool vert_allocated, - struct MFace *face, const int numFaces, const bool face_allocated, + const struct MVert *vert, const bool vert_allocated, + const struct MFace *face, const int numFaces, const bool face_allocated, const BLI_bitmap *mask, int numFaces_active, float epsilon, int tree_type, int axis); diff --git a/source/blender/blenkernel/BKE_curve.h b/source/blender/blenkernel/BKE_curve.h index 5558786d254..e111bd0e16b 100644 --- a/source/blender/blenkernel/BKE_curve.h +++ b/source/blender/blenkernel/BKE_curve.h @@ -36,6 +36,7 @@ struct BezTriple; struct Curve; struct EditNurb; +struct GHash; struct ListBase; struct Main; struct Nurb; @@ -52,6 +53,13 @@ typedef struct CurveCache { struct Path *path; } CurveCache; +/* Definitions needed for shape keys */ +typedef struct CVKeyIndex { + void *orig_cv; + int key_index, nu_index, pt_index, vertex_index; + bool switched; +} CVKeyIndex; + #define KNOTSU(nu) ( (nu)->orderu + (nu)->pntsu + (((nu)->flagu & CU_NURB_CYCLIC) ? ((nu)->orderu - 1) : 0) ) #define KNOTSV(nu) ( (nu)->orderv + (nu)->pntsv + (((nu)->flagv & CU_NURB_CYCLIC) ? ((nu)->orderv - 1) : 0) ) @@ -108,7 +116,8 @@ void BK_curve_nurbs_vertexCos_apply(struct ListBase *lb, float (*vertexCos)[3]); float (*BKE_curve_nurbs_keyVertexCos_get(struct ListBase *lb, float *key))[3]; void BKE_curve_nurbs_keyVertexTilts_apply(struct ListBase *lb, float *key); -void BKE_curve_editNurb_keyIndex_free(struct EditNurb *editnurb); +void BKE_curve_editNurb_keyIndex_delCV(struct GHash *keyindex, const void *cv); +void BKE_curve_editNurb_keyIndex_free(struct GHash **keyindex); void BKE_curve_editNurb_free(struct Curve *cu); struct ListBase *BKE_curve_editNurbs_get(struct Curve *cu); diff --git a/source/blender/blenkernel/BKE_mesh.h b/source/blender/blenkernel/BKE_mesh.h index d41878825bb..b83bec5a302 100644 --- a/source/blender/blenkernel/BKE_mesh.h +++ b/source/blender/blenkernel/BKE_mesh.h @@ -131,8 +131,7 @@ bool BKE_mesh_uv_cdlayer_rename(struct Mesh *me, const char *old_name, const cha float (*BKE_mesh_vertexCos_get(const struct Mesh *me, int *r_numVerts))[3]; -void BKE_mesh_calc_normals_split(struct Mesh *mesh); -void BKE_mesh_split_faces(struct Mesh *mesh); +void BKE_mesh_split_faces(struct Mesh *mesh, bool free_loop_normals); struct Mesh *BKE_mesh_new_from_object(struct Main *bmain, struct Scene *sce, struct Object *ob, int apply_modifiers, int settings, int calc_tessface, int calc_undeformed); @@ -228,6 +227,9 @@ void BKE_lnor_space_custom_normal_to_data(MLoopNorSpace *lnor_space, const float bool BKE_mesh_has_custom_loop_normals(struct Mesh *me); +void BKE_mesh_calc_normals_split(struct Mesh *mesh); +void BKE_mesh_calc_normals_split_ex(struct Mesh *mesh, struct MLoopNorSpaceArray *r_lnors_spacearr); + void BKE_mesh_normals_loop_split( const struct MVert *mverts, const int numVerts, struct MEdge *medges, const int numEdges, struct MLoop *mloops, float (*r_loopnors)[3], const int numLoops, diff --git a/source/blender/blenkernel/BKE_object.h b/source/blender/blenkernel/BKE_object.h index d812ab832a1..89adbc4338f 100644 --- a/source/blender/blenkernel/BKE_object.h +++ b/source/blender/blenkernel/BKE_object.h @@ -139,8 +139,6 @@ void BKE_boundbox_init_from_minmax(struct BoundBox *bb, const float min[3], cons void BKE_boundbox_calc_center_aabb(const struct BoundBox *bb, float r_cent[3]); void BKE_boundbox_calc_size_aabb(const struct BoundBox *bb, float r_size[3]); void BKE_boundbox_minmax(const struct BoundBox *bb, float obmat[4][4], float r_min[3], float r_max[3]); -struct BoundBox *BKE_boundbox_ensure_minimum_dimensions( - struct BoundBox *bb, struct BoundBox *bb_temp, const float epsilon); struct BoundBox *BKE_object_boundbox_get(struct Object *ob); void BKE_object_dimensions_get(struct Object *ob, float vec[3]); diff --git a/source/blender/blenkernel/intern/appdir.c b/source/blender/blenkernel/intern/appdir.c index b1dcc40279f..f2f0a92d8b3 100644 --- a/source/blender/blenkernel/intern/appdir.c +++ b/source/blender/blenkernel/intern/appdir.c @@ -121,7 +121,7 @@ static bool test_path(char *targetpath, const char *path_base, const char *path_ if (path_sep) BLI_join_dirfile(tmppath, sizeof(tmppath), path_base, path_sep); else BLI_strncpy(tmppath, path_base, sizeof(tmppath)); - /* rare cases folder_name is omitted (when looking for ~/.blender/2.xx dir only) */ + /* rare cases folder_name is omitted (when looking for ~/.config/blender/2.xx dir only) */ if (folder_name) BLI_make_file_string("/", targetpath, tmppath, folder_name); else @@ -755,7 +755,6 @@ static void where_is_temp(char *fullname, char *basename, const size_t maxlen, c void BKE_tempdir_init(char *userdir) { where_is_temp(btempdir_session, btempdir_base, FILE_MAX, userdir); -; } /** diff --git a/source/blender/blenkernel/intern/bvhutils.c b/source/blender/blenkernel/intern/bvhutils.c index d0e0c82e3be..c0e4ef37a93 100644 --- a/source/blender/blenkernel/intern/bvhutils.c +++ b/source/blender/blenkernel/intern/bvhutils.c @@ -376,45 +376,6 @@ static void mesh_edges_spherecast(void *userdata, int index, const BVHTreeRay *r } } -#define V3_MUL_ELEM(a, b) \ - (a)[0] * (b)[0], \ - (a)[1] * (b)[1], \ - (a)[2] * (b)[2] - -/* Callback to bvh tree nearest edge to ray. - * The tree must have been built using bvhtree_from_mesh_edges. - * userdata must be a BVHMeshCallbackUserdata built from the same mesh as the tree. */ -static void mesh_edges_nearest_to_ray( - void *userdata, const float ray_co[3], const float ray_dir[3], - const float scale[3], int index, BVHTreeNearest *nearest) -{ - struct BVHTreeFromMesh *data = userdata; - const MVert *vert = data->vert; - const MEdge *e = &data->edge[index]; - - const float t0[3] = {V3_MUL_ELEM(vert[e->v1].co, scale)}; - const float t1[3] = {V3_MUL_ELEM(vert[e->v2].co, scale)}; - const float origin_sc[3] = {V3_MUL_ELEM(ray_co, scale)}; - const float dir_sc[3] = {V3_MUL_ELEM(ray_dir, scale)}; - - float depth, point[3]; - const float dist_sq = dist_squared_ray_to_seg_v3(origin_sc, dir_sc, t0, t1, point, &depth); - - if (dist_sq < nearest->dist_sq) { - nearest->dist_sq = dist_sq; - nearest->index = index; - - point[0] /= scale[0]; - point[1] /= scale[1]; - point[2] /= scale[2]; - - copy_v3_v3(nearest->co, point); - sub_v3_v3v3(nearest->no, t0, t1); - } -} - -#undef V3_MUL_ELEM - /** \} */ /* @@ -459,7 +420,7 @@ static BVHTree *bvhtree_from_editmesh_verts_create_tree( static BVHTree *bvhtree_from_mesh_verts_create_tree( float epsilon, int tree_type, int axis, - MVert *vert, const int verts_num, + const MVert *vert, const int verts_num, const BLI_bitmap *verts_mask, int verts_num_active) { BLI_assert(vert != NULL); @@ -488,31 +449,23 @@ static BVHTree *bvhtree_from_mesh_verts_create_tree( static void bvhtree_from_mesh_verts_setup_data( BVHTreeFromMesh *data, BVHTree *tree, const bool is_cached, float epsilon, - MVert *vert, const bool vert_allocated) + const MVert *vert, const bool vert_allocated) { memset(data, 0, sizeof(*data)); - if (tree) { - data->tree = tree; - data->cached = is_cached; + data->tree = tree; + data->cached = is_cached; - /* a NULL nearest callback works fine - * remember the min distance to point is the same as the min distance to BV of point */ - data->nearest_callback = NULL; - data->raycast_callback = mesh_verts_spherecast; - data->nearest_to_ray_callback = NULL; + /* a NULL nearest callback works fine + * remember the min distance to point is the same as the min distance to BV of point */ + data->nearest_callback = NULL; + data->raycast_callback = mesh_verts_spherecast; - data->vert = vert; - data->vert_allocated = vert_allocated; - //data->face = DM_get_tessface_array(dm, &data->face_allocated); /* XXX WHY???? */ + data->vert = vert; + data->vert_allocated = vert_allocated; + //data->face = DM_get_tessface_array(dm, &data->face_allocated); /* XXX WHY???? */ - data->sphere_radius = epsilon; - } - else { - if (vert_allocated) { - MEM_freeN(vert); - } - } + data->sphere_radius = epsilon; } /* Builds a bvh tree where nodes are the vertices of the given em */ @@ -531,7 +484,6 @@ BVHTree *bvhtree_from_editmesh_verts_ex( data->em = em; data->nearest_callback = NULL; data->raycast_callback = editmesh_verts_spherecast; - data->nearest_to_ray_callback = NULL; } return tree; @@ -588,11 +540,18 @@ BVHTree *bvhtree_from_mesh_verts( /* printf("BVHTree is already build, using cached tree\n"); */ } - /* Setup BVHTreeFromMesh */ - bvhtree_from_mesh_verts_setup_data( - data, tree, true, epsilon, vert, vert_allocated); - - return data->tree; + if (tree) { + /* Setup BVHTreeFromMesh */ + bvhtree_from_mesh_verts_setup_data( + data, tree, true, epsilon, vert, vert_allocated); + } + else { + if (vert_allocated) { + MEM_freeN(vert); + } + memset(data, 0, sizeof(*data)); + } + return tree; } /** @@ -602,7 +561,7 @@ BVHTree *bvhtree_from_mesh_verts( * \param verts_num_active if >= 0, number of active verts to add to BVH tree (else will be computed from mask). */ BVHTree *bvhtree_from_mesh_verts_ex( - BVHTreeFromMesh *data, MVert *vert, const int verts_num, const bool vert_allocated, + BVHTreeFromMesh *data, const MVert *vert, const int verts_num, const bool vert_allocated, const BLI_bitmap *verts_mask, int verts_num_active, float epsilon, int tree_type, int axis) { @@ -613,7 +572,7 @@ BVHTree *bvhtree_from_mesh_verts_ex( bvhtree_from_mesh_verts_setup_data( data, tree, false, epsilon, vert, vert_allocated); - return data->tree; + return tree; } /** \} */ @@ -661,7 +620,7 @@ static BVHTree *bvhtree_from_editmesh_edges_create_tree( } static BVHTree *bvhtree_from_mesh_edges_create_tree( - MVert *vert, MEdge *edge, const int edge_num, + const MVert *vert, const MEdge *edge, const int edge_num, const BLI_bitmap *edges_mask, int edges_num_active, float epsilon, int tree_type, int axis) { @@ -694,34 +653,26 @@ static BVHTree *bvhtree_from_mesh_edges_create_tree( } static void bvhtree_from_mesh_edges_setup_data( - BVHTreeFromMesh *data, BVHTree *tree, const bool is_cached, float epsilon, - MVert *vert, const bool vert_allocated, MEdge *edge, const bool edge_allocated) + BVHTreeFromMesh *data, BVHTree *tree, + const bool is_cached, float epsilon, + const MVert *vert, const bool vert_allocated, + const MEdge *edge, const bool edge_allocated) { memset(data, 0, sizeof(*data)); + data->tree = tree; - if (data->tree) { - data->cached = is_cached; + data->cached = is_cached; - data->nearest_callback = mesh_edges_nearest_point; - data->raycast_callback = mesh_edges_spherecast; - data->nearest_to_ray_callback = mesh_edges_nearest_to_ray; + data->nearest_callback = mesh_edges_nearest_point; + data->raycast_callback = mesh_edges_spherecast; - data->vert = vert; - data->vert_allocated = vert_allocated; - data->edge = edge; - data->edge_allocated = edge_allocated; + data->vert = vert; + data->vert_allocated = vert_allocated; + data->edge = edge; + data->edge_allocated = edge_allocated; - data->sphere_radius = epsilon; - } - else { - if (vert_allocated) { - MEM_freeN(vert); - } - if (edge_allocated) { - MEM_freeN(edge); - } - } + data->sphere_radius = epsilon; } /* Builds a bvh tree where nodes are the edges of the given em */ @@ -742,8 +693,6 @@ BVHTree *bvhtree_from_editmesh_edges_ex( data->em = em; data->nearest_callback = NULL; /* TODO */ data->raycast_callback = NULL; /* TODO */ - /* TODO: not urgent however since users currently define own callbacks */ - data->nearest_to_ray_callback = NULL; } return tree; @@ -795,11 +744,21 @@ BVHTree *bvhtree_from_mesh_edges( /* printf("BVHTree is already build, using cached tree\n"); */ } - /* Setup BVHTreeFromMesh */ - bvhtree_from_mesh_edges_setup_data( - data, tree, true, epsilon, vert, vert_allocated, edge, edge_allocated); - - return data->tree; + if (tree) { + /* Setup BVHTreeFromMesh */ + bvhtree_from_mesh_edges_setup_data( + data, tree, true, epsilon, vert, vert_allocated, edge, edge_allocated); + } + else { + if (vert_allocated) { + MEM_freeN(vert); + } + if (edge_allocated) { + MEM_freeN(edge); + } + memset(data, 0, sizeof(*data)); + } + return tree; } /** @@ -810,8 +769,8 @@ BVHTree *bvhtree_from_mesh_edges( */ BVHTree *bvhtree_from_mesh_edges_ex( BVHTreeFromMesh *data, - MVert *vert, const bool vert_allocated, - MEdge *edge, const int edges_num, const bool edge_allocated, + const MVert *vert, const bool vert_allocated, + const MEdge *edge, const int edges_num, const bool edge_allocated, const BLI_bitmap *edges_mask, int edges_num_active, float epsilon, int tree_type, int axis) { @@ -823,7 +782,7 @@ BVHTree *bvhtree_from_mesh_edges_ex( bvhtree_from_mesh_edges_setup_data( data, tree, false, epsilon, vert, vert_allocated, edge, edge_allocated); - return data->tree; + return tree; } /** \} */ @@ -836,7 +795,7 @@ BVHTree *bvhtree_from_mesh_edges_ex( static BVHTree *bvhtree_from_mesh_faces_create_tree( float epsilon, int tree_type, int axis, - MVert *vert, MFace *face, const int faces_num, + const MVert *vert, const MFace *face, const int faces_num, const BLI_bitmap *faces_mask, int faces_num_active) { BVHTree *tree = NULL; @@ -880,34 +839,23 @@ static BVHTree *bvhtree_from_mesh_faces_create_tree( static void bvhtree_from_mesh_faces_setup_data( BVHTreeFromMesh *data, BVHTree *tree, const bool is_cached, float epsilon, - MVert *vert, const bool vert_allocated, - MFace *face, const bool face_allocated) + const MVert *vert, const bool vert_allocated, + const MFace *face, const bool face_allocated) { memset(data, 0, sizeof(*data)); - if (tree) { - data->tree = tree; - data->cached = is_cached; + data->tree = tree; + data->cached = is_cached; - data->nearest_callback = mesh_faces_nearest_point; - data->raycast_callback = mesh_faces_spherecast; - data->nearest_to_ray_callback = NULL; + data->nearest_callback = mesh_faces_nearest_point; + data->raycast_callback = mesh_faces_spherecast; - data->vert = vert; - data->vert_allocated = vert_allocated; - data->face = face; - data->face_allocated = face_allocated; + data->vert = vert; + data->vert_allocated = vert_allocated; + data->face = face; + data->face_allocated = face_allocated; - data->sphere_radius = epsilon; - } - else { - if (vert_allocated) { - MEM_freeN(vert); - } - if (face_allocated) { - MEM_freeN(face); - } - } + data->sphere_radius = epsilon; } /* Builds a bvh tree where nodes are the tesselated faces of the given dm */ @@ -950,10 +898,21 @@ BVHTree *bvhtree_from_mesh_faces( /* printf("BVHTree is already build, using cached tree\n"); */ } - /* Setup BVHTreeFromMesh */ - bvhtree_from_mesh_faces_setup_data(data, tree, true, epsilon, vert, vert_allocated, face, face_allocated); - - return data->tree; + if (tree) { + /* Setup BVHTreeFromMesh */ + bvhtree_from_mesh_faces_setup_data( + data, tree, true, epsilon, vert, vert_allocated, face, face_allocated); + } + else { + if (vert_allocated) { + MEM_freeN(vert); + } + if (face_allocated) { + MEM_freeN(face); + } + memset(data, 0, sizeof(*data)); + } + return tree; } /** @@ -964,8 +923,8 @@ BVHTree *bvhtree_from_mesh_faces( * \param numFaces_active if >= 0, number of active faces to add to BVH tree (else will be computed from mask). */ BVHTree *bvhtree_from_mesh_faces_ex( - BVHTreeFromMesh *data, MVert *vert, const bool vert_allocated, - MFace *face, const int numFaces, const bool face_allocated, + BVHTreeFromMesh *data, const MVert *vert, const bool vert_allocated, + const MFace *face, const int numFaces, const bool face_allocated, const BLI_bitmap *faces_mask, int faces_num_active, float epsilon, int tree_type, int axis) { @@ -975,9 +934,10 @@ BVHTree *bvhtree_from_mesh_faces_ex( faces_mask, faces_num_active); /* Setup BVHTreeFromMesh */ - bvhtree_from_mesh_faces_setup_data(data, tree, false, epsilon, vert, vert_allocated, face, face_allocated); + bvhtree_from_mesh_faces_setup_data( + data, tree, false, epsilon, vert, vert_allocated, face, face_allocated); - return data->tree; + return tree; } /** \} */ @@ -1088,34 +1048,20 @@ static void bvhtree_from_mesh_looptri_setup_data( { memset(data, 0, sizeof(*data)); - if (tree) { - data->tree = tree; - data->cached = is_cached; + data->tree = tree; + data->cached = is_cached; - data->nearest_callback = mesh_looptri_nearest_point; - data->raycast_callback = mesh_looptri_spherecast; - data->nearest_to_ray_callback = NULL; + data->nearest_callback = mesh_looptri_nearest_point; + data->raycast_callback = mesh_looptri_spherecast; - data->vert = vert; - data->vert_allocated = vert_allocated; - data->loop = mloop; - data->loop_allocated = loop_allocated; - data->looptri = looptri; - data->looptri_allocated = looptri_allocated; + data->vert = vert; + data->vert_allocated = vert_allocated; + data->loop = mloop; + data->loop_allocated = loop_allocated; + data->looptri = looptri; + data->looptri_allocated = looptri_allocated; - data->sphere_radius = epsilon; - } - else { - if (vert_allocated) { - MEM_freeN((void *)vert); - } - if (loop_allocated) { - MEM_freeN((void *)mloop); - } - if (looptri_allocated) { - MEM_freeN((void *)looptri); - } - } + data->sphere_radius = epsilon; } /** @@ -1160,7 +1106,6 @@ BVHTree *bvhtree_from_editmesh_looptri_ex( data->tree = tree; data->nearest_callback = editmesh_looptri_nearest_point; data->raycast_callback = editmesh_looptri_spherecast; - data->nearest_to_ray_callback = NULL; data->sphere_radius = 0.0f; data->em = em; data->cached = bvhCache != NULL; @@ -1242,14 +1187,28 @@ BVHTree *bvhtree_from_mesh_looptri( /* printf("BVHTree is already build, using cached tree\n"); */ } - /* Setup BVHTreeFromMesh */ - bvhtree_from_mesh_looptri_setup_data( - data, tree, true, epsilon, - mvert, vert_allocated, - mloop, loop_allocated, - looptri, looptri_allocated); + if (tree) { + /* Setup BVHTreeFromMesh */ + bvhtree_from_mesh_looptri_setup_data( + data, tree, true, epsilon, + mvert, vert_allocated, + mloop, loop_allocated, + looptri, looptri_allocated); + } + else { + if (vert_allocated) { + MEM_freeN(mvert); + } + if (loop_allocated) { + MEM_freeN(mloop); + } + if (looptri_allocated) { + MEM_freeN((void *)looptri); + } + memset(data, 0, sizeof(*data)); + } - return data->tree; + return tree; } BVHTree *bvhtree_from_mesh_looptri_ex( @@ -1272,7 +1231,7 @@ BVHTree *bvhtree_from_mesh_looptri_ex( mloop, loop_allocated, looptri, looptri_allocated); - return data->tree; + return tree; } /** \} */ @@ -1292,29 +1251,27 @@ void free_bvhtree_from_editmesh(struct BVHTreeFromEditMesh *data) /* Frees data allocated by a call to bvhtree_from_mesh_*. */ void free_bvhtree_from_mesh(struct BVHTreeFromMesh *data) { - if (data->tree) { - if (!data->cached) { - BLI_bvhtree_free(data->tree); - } - - if (data->vert_allocated) { - MEM_freeN((void *)data->vert); - } - if (data->edge_allocated) { - MEM_freeN((void *)data->edge); - } - if (data->face_allocated) { - MEM_freeN((void *)data->face); - } - if (data->loop_allocated) { - MEM_freeN((void *)data->loop); - } - if (data->looptri_allocated) { - MEM_freeN((void *)data->looptri); - } + if (data->tree && !data->cached) { + BLI_bvhtree_free(data->tree); + } - memset(data, 0, sizeof(*data)); + if (data->vert_allocated) { + MEM_freeN((void *)data->vert); + } + if (data->edge_allocated) { + MEM_freeN((void *)data->edge); + } + if (data->face_allocated) { + MEM_freeN((void *)data->face); } + if (data->loop_allocated) { + MEM_freeN((void *)data->loop); + } + if (data->looptri_allocated) { + MEM_freeN((void *)data->looptri); + } + + memset(data, 0, sizeof(*data)); } diff --git a/source/blender/blenkernel/intern/cdderivedmesh.c b/source/blender/blenkernel/intern/cdderivedmesh.c index 483fa977aff..7042b46330b 100644 --- a/source/blender/blenkernel/intern/cdderivedmesh.c +++ b/source/blender/blenkernel/intern/cdderivedmesh.c @@ -2428,8 +2428,12 @@ static DerivedMesh *cddm_copy_ex(DerivedMesh *source, dm->cd_flag = source->cd_flag; dm->dirty = source->dirty; - /* Tessellation data is never copied, so tag it here. */ - dm->dirty |= DM_DIRTY_TESS_CDLAYERS; + /* Tessellation data is never copied, so tag it here. + * Only tag dirty layers if we really ignored tessellation faces. + */ + if (!copy_tessface_data) { + dm->dirty |= DM_DIRTY_TESS_CDLAYERS; + } CustomData_copy_data(&source->vertData, &dm->vertData, 0, 0, numVerts); CustomData_copy_data(&source->edgeData, &dm->edgeData, 0, 0, numEdges); diff --git a/source/blender/blenkernel/intern/curve.c b/source/blender/blenkernel/intern/curve.c index 90a514781d7..439abb1d593 100644 --- a/source/blender/blenkernel/intern/curve.c +++ b/source/blender/blenkernel/intern/curve.c @@ -89,20 +89,33 @@ void BKE_curve_editfont_free(Curve *cu) } } -void BKE_curve_editNurb_keyIndex_free(EditNurb *editnurb) +static void curve_editNurb_keyIndex_cv_free_cb(void *val) { - if (!editnurb->keyindex) { + CVKeyIndex *index = val; + MEM_freeN(index->orig_cv); + MEM_freeN(val); +} + +void BKE_curve_editNurb_keyIndex_delCV(GHash *keyindex, const void *cv) +{ + BLI_assert(keyindex != NULL); + BLI_ghash_remove(keyindex, cv, NULL, curve_editNurb_keyIndex_cv_free_cb); +} + +void BKE_curve_editNurb_keyIndex_free(GHash **keyindex) +{ + if (!(*keyindex)) { return; } - BLI_ghash_free(editnurb->keyindex, NULL, MEM_freeN); - editnurb->keyindex = NULL; + BLI_ghash_free(*keyindex, NULL, curve_editNurb_keyIndex_cv_free_cb); + *keyindex = NULL; } void BKE_curve_editNurb_free(Curve *cu) { if (cu->editnurb) { BKE_nurbList_free(&cu->editnurb->nurbs); - BKE_curve_editNurb_keyIndex_free(cu->editnurb); + BKE_curve_editNurb_keyIndex_free(&cu->editnurb->keyindex); MEM_freeN(cu->editnurb); cu->editnurb = NULL; } diff --git a/source/blender/blenkernel/intern/depsgraph.c b/source/blender/blenkernel/intern/depsgraph.c index 294a4ce76b7..678dc92a5f2 100644 --- a/source/blender/blenkernel/intern/depsgraph.c +++ b/source/blender/blenkernel/intern/depsgraph.c @@ -544,10 +544,16 @@ static void build_dag_object(DagForest *dag, DagNode *scenenode, Main *bmain, Sc if (ct->tar->type == OB_MESH) node3->customdata_mask |= CD_MASK_MDEFORMVERT; } - else if (ELEM(con->type, CONSTRAINT_TYPE_FOLLOWPATH, CONSTRAINT_TYPE_CLAMPTO, CONSTRAINT_TYPE_SPLINEIK)) + else if (ELEM(con->type, CONSTRAINT_TYPE_FOLLOWPATH, + CONSTRAINT_TYPE_CLAMPTO, + CONSTRAINT_TYPE_SPLINEIK, + CONSTRAINT_TYPE_SHRINKWRAP)) + { dag_add_relation(dag, node3, node, DAG_RL_DATA_DATA | DAG_RL_OB_DATA, cti->name); - else + } + else { dag_add_relation(dag, node3, node, DAG_RL_OB_DATA, cti->name); + } } } @@ -881,8 +887,12 @@ static void build_dag_object(DagForest *dag, DagNode *scenenode, Main *bmain, Sc if (obt->type == OB_MESH) node2->customdata_mask |= CD_MASK_MDEFORMVERT; } - else + else if (cti->type == CONSTRAINT_TYPE_SHRINKWRAP) { + dag_add_relation(dag, node2, node, DAG_RL_DATA_DATA | DAG_RL_OB_DATA, cti->name); + } + else { dag_add_relation(dag, node2, node, DAG_RL_OB_OB, cti->name); + } } addtoroot = 0; } diff --git a/source/blender/blenkernel/intern/displist.c b/source/blender/blenkernel/intern/displist.c index 49db75a0474..f8a9d57f579 100644 --- a/source/blender/blenkernel/intern/displist.c +++ b/source/blender/blenkernel/intern/displist.c @@ -819,7 +819,7 @@ static void curve_calc_modifiers_pre(Scene *scene, Object *ob, ListBase *nurb, if (editmode) required_mode |= eModifierMode_Editmode; - if (cu->editnurb == NULL) { + if (!editmode) { keyVerts = BKE_key_evaluate_object(ob, &numVerts); if (keyVerts) { diff --git a/source/blender/blenkernel/intern/effect.c b/source/blender/blenkernel/intern/effect.c index fe8f5ebdca6..4eee24b378f 100644 --- a/source/blender/blenkernel/intern/effect.c +++ b/source/blender/blenkernel/intern/effect.c @@ -848,6 +848,14 @@ static void do_physical_effector(EffectorCache *eff, EffectorData *efd, Effected break; case PFIELD_FORCE: normalize_v3(force); + if (pd->flag & PFIELD_GRAVITATION){ /* Option: Multiply by 1/distance^2 */ + if (efd->distance < FLT_EPSILON){ + strength = 0.0f; + } + else { + strength *= powf(efd->distance, -2.0f); + } + } mul_v3_fl(force, strength * efd->falloff); break; case PFIELD_VORTEX: diff --git a/source/blender/blenkernel/intern/library_remap.c b/source/blender/blenkernel/intern/library_remap.c index a408b498f18..b6f4621a0b3 100644 --- a/source/blender/blenkernel/intern/library_remap.c +++ b/source/blender/blenkernel/intern/library_remap.c @@ -179,6 +179,7 @@ static int foreach_libblock_remap_callback(void *user_data, ID *id_self, ID **id * on the other hand since they get reset to lib data on file open/reload it is indirect too... * Edit Mode is also a 'skip direct' case. */ const bool is_obj = (GS(id->name) == ID_OB); + const bool is_obj_proxy = (is_obj && (((Object *)id)->proxy || ((Object *)id)->proxy_group)); const bool is_obj_editmode = (is_obj && BKE_object_is_in_editmode((Object *)id)); const bool is_never_null = ((cb_flag & IDWALK_CB_NEVER_NULL) && (new_id == NULL) && (id_remap_data->flag & ID_REMAP_FORCE_NEVER_NULL_USAGE) == 0); @@ -231,7 +232,7 @@ static int foreach_libblock_remap_callback(void *user_data, ID *id_self, ID **id /* We cannot affect old_id->us directly, LIB_TAG_EXTRAUSER(_SET) are assumed to be set as needed, * that extra user is processed in final handling... */ } - if (!is_indirect) { + if (!is_indirect || is_obj_proxy) { id_remap_data->status |= ID_REMAP_IS_LINKED_DIRECT; } } diff --git a/source/blender/blenkernel/intern/mesh.c b/source/blender/blenkernel/intern/mesh.c index af02e02b017..befe1a4d70e 100644 --- a/source/blender/blenkernel/intern/mesh.c +++ b/source/blender/blenkernel/intern/mesh.c @@ -39,7 +39,9 @@ #include "BLI_utildefines.h" #include "BLI_math.h" +#include "BLI_linklist.h" #include "BLI_listbase.h" +#include "BLI_memarena.h" #include "BLI_edgehash.h" #include "BLI_string.h" @@ -66,6 +68,11 @@ #include "DEG_depsgraph.h" +/* Define for cases when you want extra validation of mesh + * after certain modifications. + */ +// #undef VALIDATE_MESH + enum { MESHCMP_DVERT_WEIGHTMISMATCH = 1, MESHCMP_DVERT_GROUPMISMATCH, @@ -2048,7 +2055,7 @@ void BKE_mesh_mselect_active_set(Mesh *me, int index, int type) (me->mselect[me->totselect - 1].type == type)); } -void BKE_mesh_calc_normals_split(Mesh *mesh) +void BKE_mesh_calc_normals_split_ex(Mesh *mesh, MLoopNorSpaceArray *r_lnors_spacearr) { float (*r_loopnors)[3]; float (*polynors)[3]; @@ -2083,111 +2090,330 @@ void BKE_mesh_calc_normals_split(Mesh *mesh) BKE_mesh_normals_loop_split( mesh->mvert, mesh->totvert, mesh->medge, mesh->totedge, mesh->mloop, r_loopnors, mesh->totloop, mesh->mpoly, (const float (*)[3])polynors, mesh->totpoly, - (mesh->flag & ME_AUTOSMOOTH) != 0, mesh->smoothresh, NULL, clnors, NULL); + (mesh->flag & ME_AUTOSMOOTH) != 0, mesh->smoothresh, r_lnors_spacearr, clnors, NULL); if (free_polynors) { MEM_freeN(polynors); } } -/* Spli faces based on the edge angle. - * Matches behavior of face splitting in render engines. - */ -void BKE_mesh_split_faces(Mesh *mesh) +void BKE_mesh_calc_normals_split(Mesh *mesh) { - const int num_verts = mesh->totvert; - const int num_edges = mesh->totedge; - const int num_polys = mesh->totpoly; + BKE_mesh_calc_normals_split_ex(mesh, NULL); +} + +/* Split faces helper functions. */ + +typedef struct SplitFaceNewVert { + struct SplitFaceNewVert *next; + int new_index; + int orig_index; + float *vnor; +} SplitFaceNewVert; + +typedef struct SplitFaceNewEdge { + struct SplitFaceNewEdge *next; + int new_index; + int orig_index; + int v1; + int v2; +} SplitFaceNewEdge; + +/* Detect needed new vertices, and update accordingly loops' vertex indices. + * WARNING! Leaves mesh in invalid state. */ +static int split_faces_prepare_new_verts( + const Mesh *mesh, MLoopNorSpaceArray *lnors_spacearr, SplitFaceNewVert **new_verts, MemArena *memarena, + bool *r_need_vnors_recalc) +{ + /* Note: if lnors_spacearr is NULL, ther is no autosmooth handling, and we only split out flat polys. */ + const int num_loops = mesh->totloop; + int num_verts = mesh->totvert; MVert *mvert = mesh->mvert; - MEdge *medge = mesh->medge; MLoop *mloop = mesh->mloop; - MPoly *mpoly = mesh->mpoly; - float (*lnors)[3]; - int poly, num_new_verts = 0; - if ((mesh->flag & ME_AUTOSMOOTH) == 0) { - return; - } - BKE_mesh_tessface_clear(mesh); - /* Compute loop normals if needed. */ - if (!CustomData_has_layer(&mesh->ldata, CD_NORMAL)) { - BKE_mesh_calc_normals_split(mesh); - } - lnors = CustomData_get_layer(&mesh->ldata, CD_NORMAL); - /* Count number of vertices to be split. */ - for (poly = 0; poly < num_polys; poly++) { - MPoly *mp = &mpoly[poly]; - int loop; - for (loop = 0; loop < mp->totloop; loop++) { - MLoop *ml = &mloop[mp->loopstart + loop]; - MVert *mv = &mvert[ml->v]; - float vn[3]; - normal_short_to_float_v3(vn, mv->no); - if (!equals_v3v3(vn, lnors[mp->loopstart + loop])) { - num_new_verts++; + + BLI_bitmap *verts_used = BLI_BITMAP_NEW(num_verts, __func__); + + if (lnors_spacearr) { + BLI_bitmap *done_loops = BLI_BITMAP_NEW(num_loops, __func__); + + MLoop *ml = mloop; + MLoopNorSpace **lnor_space = lnors_spacearr->lspacearr; + for (int loop_idx = 0; loop_idx < num_loops; loop_idx++, ml++, lnor_space++) { + if (!BLI_BITMAP_TEST(done_loops, loop_idx)) { + const int vert_idx = ml->v; + const bool vert_used = BLI_BITMAP_TEST_BOOL(verts_used, vert_idx); + /* If vert is already used by another smooth fan, we need a new vert for this one. */ + const int new_vert_idx = vert_used ? num_verts++ : vert_idx; + + if ((*lnor_space)->loops) { + for (LinkNode *lnode = (*lnor_space)->loops; lnode; lnode = lnode->next) { + const int ml_fan_idx = GET_INT_FROM_POINTER(lnode->link); + BLI_BITMAP_ENABLE(done_loops, ml_fan_idx); + if (vert_used) { + mloop[ml_fan_idx].v = new_vert_idx; + } + } + } + else { + /* Single loop in this fan... */ + BLI_BITMAP_ENABLE(done_loops, loop_idx); + if (vert_used) { + ml->v = new_vert_idx; + } + } + + if (!vert_used) { + BLI_BITMAP_ENABLE(verts_used, vert_idx); + /* We need to update that vertex's normal here, we won't go over it again. */ + /* This is important! *DO NOT* set vnor to final computed lnor, vnor should always be defined to + * 'automatic normal' value computed from its polys, not some custom normal. + * Fortunately, that's the loop normal space's 'lnor' reference vector. ;) */ + normal_float_to_short_v3(mvert[vert_idx].no, (*lnor_space)->vec_lnor); + } + else { + /* Add new vert to list. */ + SplitFaceNewVert *new_vert = BLI_memarena_alloc(memarena, sizeof(*new_vert)); + new_vert->orig_index = vert_idx; + new_vert->new_index = new_vert_idx; + new_vert->vnor = (*lnor_space)->vec_lnor; /* See note above. */ + new_vert->next = *new_verts; + *new_verts = new_vert; + } } } + + MEM_freeN(done_loops); } - if (num_new_verts == 0) { - /* No new vertices are to be added, can do early exit. */ - return; - } - /* Reallocate all vert and edge related data. */ - mesh->totvert += num_new_verts; - mesh->totedge += 2 * num_new_verts; - CustomData_realloc(&mesh->vdata, mesh->totvert); - CustomData_realloc(&mesh->edata, mesh->totedge); - /* Update pointers to a newly allocated memory. */ - BKE_mesh_update_customdata_pointers(mesh, false); - mvert = mesh->mvert; - medge = mesh->medge; - /* Perform actual vertex split. */ - num_new_verts = 0; - for (poly = 0; poly < num_polys; poly++) { - MPoly *mp = &mpoly[poly]; - int loop; - for (loop = 0; loop < mp->totloop; loop++) { - int poly_loop = mp->loopstart + loop; - MLoop *ml = &mloop[poly_loop]; - MVert *mv = &mvert[ml->v]; - float vn[3]; - normal_short_to_float_v3(vn, mv->no); - if (!equals_v3v3(vn, lnors[mp->loopstart + loop])) { - int poly_loop_prev = mp->loopstart + (loop + mp->totloop - 1) % mp->totloop; - MLoop *ml_prev = &mloop[poly_loop_prev]; - int new_edge_prev, new_edge; - /* Cretae new vertex. */ - int new_vert = num_verts + num_new_verts; - CustomData_copy_data(&mesh->vdata, &mesh->vdata, - ml->v, new_vert, 1); - normal_float_to_short_v3(mvert[new_vert].no, - lnors[poly_loop]); - /* Create new edges. */ - new_edge_prev = num_edges + 2 * num_new_verts; - new_edge = num_edges + 2 * num_new_verts + 1; - CustomData_copy_data(&mesh->edata, &mesh->edata, - ml_prev->e, new_edge_prev, 1); - CustomData_copy_data(&mesh->edata, &mesh->edata, - ml->e, new_edge, 1); - if (medge[new_edge_prev].v1 == ml->v) { - medge[new_edge_prev].v1 = new_vert; + else { + /* No loop normal spaces available, we only split out flat polys. */ + const int num_polys = mesh->totpoly; + const MPoly *mpoly = mesh->mpoly; + + /* We do that in two loops, to keep original edges/verts to smooth polys preferencially. */ + const MPoly *mp = mpoly; + for (int i = 0; i < num_polys; i++, mp++) { + if (mp->flag & ME_SMOOTH) { + const MLoop *ml = &mloop[mp->loopstart]; + for (int j = 0; j < mp->totloop; j++, ml++) { + /* Just mark the vertex as used/reserved, that way neighbor flat polys, if any, + * will have to create their own. */ + BLI_BITMAP_ENABLE(verts_used, ml->v); } - else { - medge[new_edge_prev].v2 = new_vert; + } + } + + mp = mpoly; + for (int i = 0; i < num_polys; i++, mp++) { + if (!(mp->flag & ME_SMOOTH)) { + MLoop *ml = &mloop[mp->loopstart]; + for (int j = 0; j < mp->totloop; j++, ml++) { + const int vert_idx = ml->v; + + if (BLI_BITMAP_TEST(verts_used, vert_idx)) { + /* Add new vert to list. */ + const int new_vert_idx = num_verts++; + ml->v = new_vert_idx; + + SplitFaceNewVert *new_vert = BLI_memarena_alloc(memarena, sizeof(*new_vert)); + new_vert->orig_index = vert_idx; + new_vert->new_index = new_vert_idx; + new_vert->vnor = NULL; /* See note below about normals. */ + new_vert->next = *new_verts; + *new_verts = new_vert; + } + else { + BLI_BITMAP_ENABLE(verts_used, vert_idx); + } } - if (medge[new_edge].v1 == ml->v) { - medge[new_edge].v1 = new_vert; + /* Note: there is no way to get new normals for smooth vertices here (and we don't have direct access + * to poly normals either for flat ones), so we'll have to recompute all vnors at the end... */ + *r_need_vnors_recalc = true; + } + } + } + + MEM_freeN(verts_used); + + return num_verts - mesh->totvert; +} + +/* Detect needed new edges, and update accordingly loops' edge indices. + * WARNING! Leaves mesh in invalid state. */ +static int split_faces_prepare_new_edges( + const Mesh *mesh, SplitFaceNewEdge **new_edges, MemArena *memarena) +{ + const int num_polys = mesh->totpoly; + int num_edges = mesh->totedge; + MEdge *medge = mesh->medge; + MLoop *mloop = mesh->mloop; + const MPoly *mpoly = mesh->mpoly; + + BLI_bitmap *edges_used = BLI_BITMAP_NEW(num_edges, __func__); + EdgeHash *edges_hash = BLI_edgehash_new_ex(__func__, num_edges); + + const MPoly *mp = mpoly; + for (int poly_idx = 0; poly_idx < num_polys; poly_idx++, mp++) { + MLoop *ml_prev = &mloop[mp->loopstart + mp->totloop - 1]; + MLoop *ml = &mloop[mp->loopstart]; + for (int loop_idx = 0; loop_idx < mp->totloop; loop_idx++, ml++) { + void **eval; + if (!BLI_edgehash_ensure_p(edges_hash, ml_prev->v, ml->v, &eval)) { + const int edge_idx = ml_prev->e; + + /* That edge has not been encountered yet, define it. */ + if (BLI_BITMAP_TEST(edges_used, edge_idx)) { + /* Original edge has already been used, we need to define a new one. */ + const int new_edge_idx = num_edges++; + *eval = SET_INT_IN_POINTER(new_edge_idx); + ml_prev->e = new_edge_idx; + + SplitFaceNewEdge *new_edge = BLI_memarena_alloc(memarena, sizeof(*new_edge)); + new_edge->orig_index = edge_idx; + new_edge->new_index = new_edge_idx; + new_edge->v1 = ml_prev->v; + new_edge->v2 = ml->v; + new_edge->next = *new_edges; + *new_edges = new_edge; } else { - medge[new_edge].v2 = new_vert; + /* We can re-use original edge. */ + medge[edge_idx].v1 = ml_prev->v; + medge[edge_idx].v2 = ml->v; + *eval = SET_INT_IN_POINTER(edge_idx); + BLI_BITMAP_ENABLE(edges_used, edge_idx); } - - ml->v = new_vert; - ml_prev->e = new_edge_prev; - ml->e = new_edge; - num_new_verts++; } + else { + /* Edge already known, just update loop's edge index. */ + ml_prev->e = GET_INT_FROM_POINTER(*eval); + } + + ml_prev = ml; } } + + MEM_freeN(edges_used); + BLI_edgehash_free(edges_hash, NULL); + + return num_edges - mesh->totedge; +} + +/* Perform actual split of vertices. */ +static void split_faces_split_new_verts( + Mesh *mesh, SplitFaceNewVert *new_verts, const int num_new_verts) +{ + const int num_verts = mesh->totvert - num_new_verts; + MVert *mvert = mesh->mvert; + + /* Remember new_verts is a single linklist, so its items are in reversed order... */ + MVert *new_mv = &mvert[mesh->totvert - 1]; + for (int i = mesh->totvert - 1; i >= num_verts ; i--, new_mv--, new_verts = new_verts->next) { + BLI_assert(new_verts->new_index == i); + BLI_assert(new_verts->new_index != new_verts->orig_index); + CustomData_copy_data(&mesh->vdata, &mesh->vdata, new_verts->orig_index, i, 1); + if (new_verts->vnor) { + normal_float_to_short_v3(new_mv->no, new_verts->vnor); + } + } +} + +/* Perform actual split of edges. */ +static void split_faces_split_new_edges( + Mesh *mesh, SplitFaceNewEdge *new_edges, const int num_new_edges) +{ + const int num_edges = mesh->totedge - num_new_edges; + MEdge *medge = mesh->medge; + + /* Remember new_edges is a single linklist, so its items are in reversed order... */ + MEdge *new_med = &medge[mesh->totedge - 1]; + for (int i = mesh->totedge - 1; i >= num_edges ; i--, new_med--, new_edges = new_edges->next) { + BLI_assert(new_edges->new_index == i); + BLI_assert(new_edges->new_index != new_edges->orig_index); + CustomData_copy_data(&mesh->edata, &mesh->edata, new_edges->orig_index, i, 1); + new_med->v1 = new_edges->v1; + new_med->v2 = new_edges->v2; + } +} + +/* Split faces based on the edge angle and loop normals. + * Matches behavior of face splitting in render engines. + * + * NOTE: Will leave CD_NORMAL loop data layer which is + * used by render engines to set shading up. + */ +void BKE_mesh_split_faces(Mesh *mesh, bool free_loop_normals) +{ + const int num_polys = mesh->totpoly; + + if (num_polys == 0) { + return; + } + BKE_mesh_tessface_clear(mesh); + + MLoopNorSpaceArray *lnors_spacearr = NULL; + MemArena *memarena; + bool need_vnors_recalc = false; + + if (mesh->flag & ME_AUTOSMOOTH) { + lnors_spacearr = MEM_callocN(sizeof(*lnors_spacearr), __func__); + /* Compute loop normals and loop normal spaces (a.k.a. smooth fans of faces around vertices). */ + BKE_mesh_calc_normals_split_ex(mesh, lnors_spacearr); + /* Stealing memarena from loop normals space array. */ + memarena = lnors_spacearr->mem; + } + else { + /* We still have to split out flat faces... */ + memarena = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, __func__); + } + + SplitFaceNewVert *new_verts = NULL; + SplitFaceNewEdge *new_edges = NULL; + + /* Detect loop normal spaces (a.k.a. smooth fans) that will need a new vert. */ + const int num_new_verts = split_faces_prepare_new_verts(mesh, lnors_spacearr, &new_verts, memarena, &need_vnors_recalc); + + if (num_new_verts > 0) { + /* Reminder: beyond this point, there is no way out, mesh is in invalid state (due to early-reassignment of + * loops' vertex and edge indices to new, to-be-created split ones). */ + + const int num_new_edges = split_faces_prepare_new_edges(mesh, &new_edges, memarena); + BLI_assert(num_new_edges > 0); + + /* Reallocate all vert and edge related data. */ + mesh->totvert += num_new_verts; + mesh->totedge += num_new_edges; + CustomData_realloc(&mesh->vdata, mesh->totvert); + CustomData_realloc(&mesh->edata, mesh->totedge); + /* Update pointers to a newly allocated memory. */ + BKE_mesh_update_customdata_pointers(mesh, false); + + /* Perform actual split of vertices and edges. */ + split_faces_split_new_verts(mesh, new_verts, num_new_verts); + split_faces_split_new_edges(mesh, new_edges, num_new_edges); + } + + /* Note: after this point mesh is expected to be valid again. */ + + /* CD_NORMAL is expected to be temporary only. */ + if (free_loop_normals) { + CustomData_free_layers(&mesh->ldata, CD_NORMAL, mesh->totloop); + } + + if (lnors_spacearr) { + /* Also frees new_verts/edges temp data, since we used its memarena to allocate them. */ + BKE_lnor_spacearr_free(lnors_spacearr); + MEM_freeN(lnors_spacearr); + } + else { + BLI_memarena_free(memarena); + } + + if (need_vnors_recalc) { + BKE_mesh_calc_normals(mesh); + } +#ifdef VALIDATE_MESH + BKE_mesh_validate(mesh, true, true); +#endif } /* settings: 1 - preview, 2 - render */ diff --git a/source/blender/blenkernel/intern/mesh_evaluate.c b/source/blender/blenkernel/intern/mesh_evaluate.c index f9eba118383..003b7b784d5 100644 --- a/source/blender/blenkernel/intern/mesh_evaluate.c +++ b/source/blender/blenkernel/intern/mesh_evaluate.c @@ -1152,7 +1152,6 @@ void BKE_mesh_normals_loop_split( const bool use_split_normals, float split_angle, MLoopNorSpaceArray *r_lnors_spacearr, short (*clnors_data)[2], int *r_loop_to_poly) { - /* For now this is not supported. If we do not use split normals, we do not generate anything fancy! */ BLI_assert(use_split_normals || !(r_lnors_spacearr)); diff --git a/source/blender/blenkernel/intern/object.c b/source/blender/blenkernel/intern/object.c index ff8be5892e9..6e754755cf3 100644 --- a/source/blender/blenkernel/intern/object.c +++ b/source/blender/blenkernel/intern/object.c @@ -2236,66 +2236,6 @@ void BKE_boundbox_minmax(const BoundBox *bb, float obmat[4][4], float r_min[3], } } -/** - * Returns a BBox which each dimensions are at least epsilon. - * \note In case a given dimension needs to be enlarged, its final value will be in [epsilon, 3 * epsilon] range. - * - * \param bb the input bbox to check. - * \param bb_temp the temp bbox to modify (\a bb content is never changed). - * \param epsilon the minimum dimension to ensure. - * \return either bb (if nothing needed to be changed) or bb_temp. - */ -BoundBox *BKE_boundbox_ensure_minimum_dimensions(BoundBox *bb, BoundBox *bb_temp, const float epsilon) -{ - if (fabsf(bb->vec[0][0] - bb->vec[4][0]) < epsilon) { - /* Flat along X axis... */ - *bb_temp = *bb; - bb = bb_temp; - bb->vec[0][0] -= epsilon; - bb->vec[1][0] -= epsilon; - bb->vec[2][0] -= epsilon; - bb->vec[3][0] -= epsilon; - bb->vec[4][0] += epsilon; - bb->vec[5][0] += epsilon; - bb->vec[6][0] += epsilon; - bb->vec[7][0] += epsilon; - } - - if (fabsf(bb->vec[0][1] - bb->vec[3][1]) < epsilon) { - /* Flat along Y axis... */ - if (bb != bb_temp) { - *bb_temp = *bb; - bb = bb_temp; - } - bb->vec[0][1] -= epsilon; - bb->vec[1][1] -= epsilon; - bb->vec[4][1] -= epsilon; - bb->vec[5][1] -= epsilon; - bb->vec[2][1] += epsilon; - bb->vec[3][1] += epsilon; - bb->vec[6][1] += epsilon; - bb->vec[7][1] += epsilon; - } - - if (fabsf(bb->vec[0][2] - bb->vec[1][2]) < epsilon) { - /* Flat along Z axis... */ - if (bb != bb_temp) { - *bb_temp = *bb; - bb = bb_temp; - } - bb->vec[0][2] -= epsilon; - bb->vec[3][2] -= epsilon; - bb->vec[4][2] -= epsilon; - bb->vec[7][2] -= epsilon; - bb->vec[1][2] += epsilon; - bb->vec[2][2] += epsilon; - bb->vec[5][2] += epsilon; - bb->vec[6][2] += epsilon; - } - - return bb; -} - BoundBox *BKE_object_boundbox_get(Object *ob) { BoundBox *bb = NULL; diff --git a/source/blender/blenkernel/intern/scene.c b/source/blender/blenkernel/intern/scene.c index 56bfe5d7ff1..906fa0134a0 100644 --- a/source/blender/blenkernel/intern/scene.c +++ b/source/blender/blenkernel/intern/scene.c @@ -1510,8 +1510,6 @@ static void scene_update_object_func(TaskPool * __restrict pool, void *taskdata, if (add_to_stats) { StatisicsEntry *entry; - BLI_assert(threadid < BLI_pool_get_num_threads(pool)); - entry = MEM_mallocN(sizeof(StatisicsEntry), "update thread statistics"); entry->object = object; entry->start_time = start_time; @@ -1631,10 +1629,11 @@ static bool scene_need_update_objects(Main *bmain) static void scene_update_objects(EvaluationContext *eval_ctx, Main *bmain, Scene *scene, Scene *scene_parent) { - TaskScheduler *task_scheduler = BLI_task_scheduler_get(); + TaskScheduler *task_scheduler; TaskPool *task_pool; ThreadedObjectUpdateState state; bool need_singlethread_pass; + bool need_free_scheduler; /* Early check for whether we need to invoke all the task-based * things (spawn new ppol, traverse dependency graph and so on). @@ -1651,6 +1650,15 @@ static void scene_update_objects(EvaluationContext *eval_ctx, Main *bmain, Scene state.scene = scene; state.scene_parent = scene_parent; + if (G.debug & G_DEBUG_DEPSGRAPH_NO_THREADS) { + task_scheduler = BLI_task_scheduler_create(1); + need_free_scheduler = true; + } + else { + task_scheduler = BLI_task_scheduler_get(); + need_free_scheduler = false; + } + /* Those are only needed when blender is run with --debug argument. */ if (G.debug & G_DEBUG_DEPSGRAPH) { const int tot_thread = BLI_task_scheduler_num_threads(task_scheduler); @@ -1665,9 +1673,6 @@ static void scene_update_objects(EvaluationContext *eval_ctx, Main *bmain, Scene #endif task_pool = BLI_task_pool_create(task_scheduler, &state); - if (G.debug & G_DEBUG_DEPSGRAPH_NO_THREADS) { - BLI_pool_set_num_threads(task_pool, 1); - } DAG_threaded_update_begin(scene, scene_update_object_add_task, task_pool); BLI_task_pool_work_and_wait(task_pool); @@ -1700,6 +1705,10 @@ static void scene_update_objects(EvaluationContext *eval_ctx, Main *bmain, Scene if (need_singlethread_pass) { scene_update_all_bases(eval_ctx, scene, scene_parent); } + + if (need_free_scheduler) { + BLI_task_scheduler_free(task_scheduler); + } } static void scene_update_tagged_recursive(EvaluationContext *eval_ctx, Main *bmain, Scene *scene, Scene *scene_parent) diff --git a/source/blender/blenkernel/intern/tracking_stabilize.c b/source/blender/blenkernel/intern/tracking_stabilize.c index 36b24fbb2dc..722fc89a75f 100644 --- a/source/blender/blenkernel/intern/tracking_stabilize.c +++ b/source/blender/blenkernel/intern/tracking_stabilize.c @@ -1167,7 +1167,8 @@ static void stabilization_calculate_data(StabContext *ctx, if (ctx->stab->flag & TRACKING_STABILIZE_SCALE) { *r_scale = expf(scale_step * scaleinf); /* Averaged in log scale */ - } else { + } + else { *r_scale = 1.0f; } @@ -1180,8 +1181,8 @@ static void stabilization_calculate_data(StabContext *ctx, */ get_animated_target_pos(ctx, framenr, target_pos); sub_v2_v2(r_translation, target_pos); - *r_angle -= get_animated_target_rot(ctx,framenr); - target_scale = get_animated_target_scale(ctx,framenr); + *r_angle -= get_animated_target_rot(ctx, framenr); + target_scale = get_animated_target_scale(ctx, framenr); if (target_scale != 0.0f) { *r_scale /= target_scale; /* target_scale is an expected/intended reference zoom value */ diff --git a/source/blender/blenlib/BLI_kdopbvh.h b/source/blender/blenlib/BLI_kdopbvh.h index 91d39801645..ba565fca522 100644 --- a/source/blender/blenlib/BLI_kdopbvh.h +++ b/source/blender/blenlib/BLI_kdopbvh.h @@ -95,10 +95,6 @@ typedef void (*BVHTree_NearestPointCallback)(void *userdata, int index, const fl /* callback must update hit in case it finds a nearest successful hit */ typedef void (*BVHTree_RayCastCallback)(void *userdata, int index, const BVHTreeRay *ray, BVHTreeRayHit *hit); -/* callback must update nearest in case it finds a nearest result */ -typedef void (*BVHTree_NearestToRayCallback)(void *userdata, const float ray_co[3], const float ray_dir[3], - const float scale[3], int index, BVHTreeNearest *nearest); - /* callback to check if 2 nodes overlap (use thread if intersection results need to be stored) */ typedef bool (*BVHTree_OverlapCallback)(void *userdata, int index_a, int index_b, int thread); @@ -143,18 +139,6 @@ int BLI_bvhtree_find_nearest( BVHTree *tree, const float co[3], BVHTreeNearest *nearest, BVHTree_NearestPointCallback callback, void *userdata); -int BLI_bvhtree_find_nearest_to_ray_angle( - BVHTree *tree, const float co[3], const float dir[3], - const bool ray_is_normalized, const float scale[3], - BVHTreeNearest *nearest, - BVHTree_NearestToRayCallback callback, void *userdata); - -int BLI_bvhtree_find_nearest_to_ray( - BVHTree *tree, const float co[3], const float dir[3], - const bool ray_is_normalized, const float scale[3], - BVHTreeNearest *nearest, - BVHTree_NearestToRayCallback callback, void *userdata); - int BLI_bvhtree_ray_cast_ex( BVHTree *tree, const float co[3], const float dir[3], float radius, BVHTreeRayHit *hit, BVHTree_RayCastCallback callback, void *userdata, diff --git a/source/blender/blenlib/BLI_math_geom.h b/source/blender/blenlib/BLI_math_geom.h index 4a85e859c16..f1d9c9571f2 100644 --- a/source/blender/blenlib/BLI_math_geom.h +++ b/source/blender/blenlib/BLI_math_geom.h @@ -298,23 +298,6 @@ bool isect_ray_aabb_v3_simple( const float bb_min[3], const float bb_max[3], float *tmin, float *tmax); -struct NearestRayToAABB_Precalc { - float ray_origin[3]; - float ray_direction[3]; - float ray_inv_dir[3]; - float cdot_axis[3]; - float idiag_sq[3]; - bool sign[3]; -}; - -void dist_squared_ray_to_aabb_v3_precalc( - struct NearestRayToAABB_Precalc *data, - const float ray_origin[3], const float ray_direction[3]); -float dist_squared_ray_to_aabb_v3( - const struct NearestRayToAABB_Precalc *data, - const float bb_min[3], const float bb_max[3], - bool r_axis_closest[3]); - /* other */ bool isect_sweeping_sphere_tri_v3(const float p1[3], const float p2[3], const float radius, const float v0[3], const float v1[3], const float v2[3], float *r_lambda, float ipoint[3]); diff --git a/source/blender/blenlib/BLI_rect.h b/source/blender/blenlib/BLI_rect.h index 59bf3644912..041679ef876 100644 --- a/source/blender/blenlib/BLI_rect.h +++ b/source/blender/blenlib/BLI_rect.h @@ -47,6 +47,8 @@ bool BLI_rcti_is_empty(const struct rcti *rect); bool BLI_rctf_is_empty(const struct rctf *rect); void BLI_rctf_init(struct rctf *rect, float xmin, float xmax, float ymin, float ymax); void BLI_rcti_init(struct rcti *rect, int xmin, int xmax, int ymin, int ymax); +void BLI_rctf_init_pt_radius(struct rctf *rect, const float xy[2], float size); +void BLI_rcti_init_pt_radius(struct rcti *rect, const int xy[2], int size); void BLI_rcti_init_minmax(struct rcti *rect); void BLI_rctf_init_minmax(struct rctf *rect); void BLI_rcti_do_minmax_v(struct rcti *rect, const int xy[2]); diff --git a/source/blender/blenlib/BLI_task.h b/source/blender/blenlib/BLI_task.h index 967e0be6d0a..c3c587275e1 100644 --- a/source/blender/blenlib/BLI_task.h +++ b/source/blender/blenlib/BLI_task.h @@ -81,6 +81,7 @@ typedef void (*TaskFreeFunction)(TaskPool *__restrict pool, void *taskdata, int TaskPool *BLI_task_pool_create(TaskScheduler *scheduler, void *userdata); TaskPool *BLI_task_pool_create_background(TaskScheduler *scheduler, void *userdata); +TaskPool *BLI_task_pool_create_suspended(TaskScheduler *scheduler, void *userdata); void BLI_task_pool_free(TaskPool *pool); void BLI_task_pool_push_ex( @@ -95,14 +96,6 @@ void BLI_task_pool_push_from_thread(TaskPool *pool, TaskRunFunction run, void BLI_task_pool_work_and_wait(TaskPool *pool); /* cancel all tasks, keep worker threads running */ void BLI_task_pool_cancel(TaskPool *pool); -/* stop all worker threads */ -void BLI_task_pool_stop(TaskPool *pool); - -/* get number of threads allowed to be used by this pool */ -int BLI_pool_get_num_threads(TaskPool *pool); - -/* set number of threads allowed to be used by this pool */ -void BLI_pool_set_num_threads(TaskPool *pool, int num_threads); /* for worker threads, test if canceled */ bool BLI_task_pool_canceled(TaskPool *pool); @@ -113,9 +106,6 @@ void *BLI_task_pool_userdata(TaskPool *pool); /* optional mutex to use from run function */ ThreadMutex *BLI_task_pool_user_mutex(TaskPool *pool); -/* number of tasks done, for stats, don't use this to make decisions */ -size_t BLI_task_pool_tasks_done(TaskPool *pool); - /* Parallel for routines */ typedef void (*TaskParallelRangeFunc)(void *userdata, const int iter); typedef void (*TaskParallelRangeFuncEx)(void *userdata, void *userdata_chunk, const int iter, const int thread_id); diff --git a/source/blender/blenlib/intern/BLI_kdopbvh.c b/source/blender/blenlib/intern/BLI_kdopbvh.c index b14007a88cb..19d9711922e 100644 --- a/source/blender/blenlib/intern/BLI_kdopbvh.c +++ b/source/blender/blenlib/intern/BLI_kdopbvh.c @@ -159,29 +159,6 @@ typedef struct BVHRayCastData { BVHTreeRayHit hit; } BVHRayCastData; -typedef struct BVHNearestRayData { - BVHTree *tree; - BVHTree_NearestToRayCallback callback; - void *userdata; - - struct { - bool sign[3]; - float origin[3]; - float direction[3]; - - float direction_scaled_square[3]; - float inv_dir[3]; - - float cdot_axis[3]; - } ray; - - bool pick_smallest[3]; - - BVHTreeNearest nearest; - - float scale[3]; -} BVHNearestRayData; - /** \} */ @@ -1900,453 +1877,6 @@ void BLI_bvhtree_ray_cast_all( /* -------------------------------------------------------------------- */ -/** \name BLI_bvhtree_find_nearest_to_ray functions - * - * \{ */ - -static void dist_squared_ray_to_aabb_scaled_v3_precalc( - BVHNearestRayData *data, - const float ray_origin[3], const float ray_direction[3], - const bool ray_is_normalized, const float scale[3]) -{ - if (scale) { - copy_v3_v3(data->scale, scale); - } - else { - copy_v3_fl(data->scale, 1.0f); - } - /* un-normalize ray */ - if (ray_is_normalized && scale && - (data->scale[0] != 1.0f || data->scale[1] != 1.0f || data->scale[2] != 1.0f)) - { - data->ray.direction[0] = ray_direction[0] * data->scale[0]; - data->ray.direction[1] = ray_direction[1] * data->scale[1]; - data->ray.direction[2] = ray_direction[2] * data->scale[2]; - - mul_v3_v3fl(data->ray.direction, ray_direction, 1 / len_v3(data->ray.direction)); - } - else { - copy_v3_v3(data->ray.direction, ray_direction); - } - - float dir_sq[3]; - - for (int i = 0; i < 3; i++) { - data->ray.origin[i] = ray_origin[i]; - data->ray.inv_dir[i] = (data->ray.direction[i] != 0.0f) ? - (1.0f / data->ray.direction[i]) : FLT_MAX; - /* It has to be in function of `ray.inv_dir`, - * since the division of 1 by 0.0f, can be -inf or +inf */ - data->ray.sign[i] = (data->ray.inv_dir[i] < 0.0f); - - data->ray.direction_scaled_square[i] = data->ray.direction[i] * data->scale[i]; - - dir_sq[i] = SQUARE(data->ray.direction_scaled_square[i]); - - data->ray.direction_scaled_square[i] *= data->scale[i]; - } - - /* `diag_sq` Length square of each face diagonal */ - float diag_sq[3] = { - dir_sq[1] + dir_sq[2], - dir_sq[0] + dir_sq[2], - dir_sq[0] + dir_sq[1], - }; - - data->ray.cdot_axis[0] = (diag_sq[0] != 0.0f) ? data->ray.direction[0] / diag_sq[0] : FLT_MAX; - data->ray.cdot_axis[1] = (diag_sq[1] != 0.0f) ? data->ray.direction[1] / diag_sq[1] : FLT_MAX; - data->ray.cdot_axis[2] = (diag_sq[2] != 0.0f) ? data->ray.direction[2] / diag_sq[2] : FLT_MAX; -} - -/** - * Returns the squared distance from a ray to a bound-box `AABB`. - * It is based on `fast_ray_nearest_hit` solution to obtain - * the coordinates of the nearest edge of Bound Box to the ray - */ -MINLINE float dist_squared_ray_to_aabb_scaled_v3__impl( - const BVHNearestRayData *data, - const float bv[6], float *r_depth_sq, bool r_axis_closest[3]) -{ - - /* `tmin` is a vector that has the smaller distances to each of the - * infinite planes of the `AABB` faces (hit in nearest face X plane, - * nearest face Y plane and nearest face Z plane) */ - float local_bvmin[3], local_bvmax[3]; - - if (data->ray.sign[0]) { - local_bvmin[0] = bv[1]; - local_bvmax[0] = bv[0]; - } - else { - local_bvmin[0] = bv[0]; - local_bvmax[0] = bv[1]; - } - - if (data->ray.sign[1]) { - local_bvmin[1] = bv[3]; - local_bvmax[1] = bv[2]; - } - else { - local_bvmin[1] = bv[2]; - local_bvmax[1] = bv[3]; - } - - if (data->ray.sign[2]) { - local_bvmin[2] = bv[5]; - local_bvmax[2] = bv[4]; - } - else { - local_bvmin[2] = bv[4]; - local_bvmax[2] = bv[5]; - } - - sub_v3_v3(local_bvmin, data->ray.origin); - sub_v3_v3(local_bvmax, data->ray.origin); - - const float tmin[3] = { - local_bvmin[0] * data->ray.inv_dir[0], - local_bvmin[1] * data->ray.inv_dir[1], - local_bvmin[2] * data->ray.inv_dir[2], - }; - - /* `tmax` is a vector that has the longer distances to each of the - * infinite planes of the `AABB` faces (hit in farthest face X plane, - * farthest face Y plane and farthest face Z plane) */ - const float tmax[3] = { - local_bvmax[0] * data->ray.inv_dir[0], - local_bvmax[1] * data->ray.inv_dir[1], - local_bvmax[2] * data->ray.inv_dir[2], - }; - /* `v1` and `v3` is be the coordinates of the nearest `AABB` edge to the ray*/ - float v1[3], v2[3]; - /* `rtmin` is the highest value of the smaller distances. == max_axis_v3(tmin) - * `rtmax` is the lowest value of longer distances. == min_axis_v3(tmax)*/ - float rtmin, rtmax, mul; - /* `main_axis` is the axis equivalent to edge close to the ray */ - int main_axis; - - r_axis_closest[0] = false; - r_axis_closest[1] = false; - r_axis_closest[2] = false; - - /* *** min_axis_v3(tmax) *** */ - if ((tmax[0] <= tmax[1]) && (tmax[0] <= tmax[2])) { - // printf("# Hit in X %s\n", data->sign[0] ? "min", "max"); - rtmax = tmax[0]; - v1[0] = v2[0] = local_bvmax[0]; - mul = local_bvmax[0] * data->ray.direction_scaled_square[0]; - main_axis = 3; - r_axis_closest[0] = data->ray.sign[0]; - } - else if ((tmax[1] <= tmax[0]) && (tmax[1] <= tmax[2])) { - // printf("# Hit in Y %s\n", data->sign[1] ? "min", "max"); - rtmax = tmax[1]; - v1[1] = v2[1] = local_bvmax[1]; - mul = local_bvmax[1] * data->ray.direction_scaled_square[1]; - main_axis = 2; - r_axis_closest[1] = data->ray.sign[1]; - } - else { - // printf("# Hit in Z %s\n", data->sign[2] ? "min", "max"); - rtmax = tmax[2]; - v1[2] = v2[2] = local_bvmax[2]; - mul = local_bvmax[2] * data->ray.direction_scaled_square[2]; - main_axis = 1; - r_axis_closest[2] = data->ray.sign[2]; - } - - /* *** max_axis_v3(tmin) *** */ - if ((tmin[0] >= tmin[1]) && (tmin[0] >= tmin[2])) { - // printf("# To X %s\n", data->sign[0] ? "max", "min"); - rtmin = tmin[0]; - v1[0] = v2[0] = local_bvmin[0]; - mul += local_bvmin[0] * data->ray.direction_scaled_square[0]; - main_axis -= 3; - r_axis_closest[0] = !data->ray.sign[0]; - } - else if ((tmin[1] >= tmin[0]) && (tmin[1] >= tmin[2])) { - // printf("# To Y %s\n", data->sign[1] ? "max", "min"); - rtmin = tmin[1]; - v1[1] = v2[1] = local_bvmin[1]; - mul += local_bvmin[1] * data->ray.direction_scaled_square[1]; - main_axis -= 1; - r_axis_closest[1] = !data->ray.sign[1]; - } - else { - // printf("# To Z %s\n", data->sign[2] ? "max", "min"); - rtmin = tmin[2]; - v1[2] = v2[2] = local_bvmin[2]; - mul += local_bvmin[2] * data->ray.direction_scaled_square[2]; - main_axis -= 2; - r_axis_closest[2] = !data->ray.sign[2]; - } - /* *** end min/max axis *** */ - - if (main_axis < 0) - main_axis += 3; - - /* if rtmin < rtmax, ray intersect `AABB` */ - if (rtmin <= rtmax) { -#ifdef IGNORE_BEHIND_RAY - /* `if rtmax < depth_min`, the whole `AABB` is behind us */ - if (rtmax < min_depth) { - return fallback; - } -#endif - const float proj = rtmin * data->ray.direction[main_axis]; - - if (data->ray.sign[main_axis]) - r_axis_closest[main_axis] = (proj - local_bvmax[main_axis]) < (local_bvmin[main_axis] - proj); - else - r_axis_closest[main_axis] = (proj - local_bvmin[main_axis]) < (local_bvmax[main_axis] - proj); - - //if (r_depth_sq) - // *r_depth_sq = SQUARE(rtmin); - - return 0.0f; - } -#ifdef IGNORE_BEHIND_RAY - /* `if rtmin < depth_min`, the whole `AABB` is behing us */ - else if (rtmin < min_depth) { - return fallback; - } -#endif - - if (data->ray.sign[main_axis]) { - v1[main_axis] = local_bvmax[main_axis]; - v2[main_axis] = local_bvmin[main_axis]; - } - else { - v1[main_axis] = local_bvmin[main_axis]; - v2[main_axis] = local_bvmax[main_axis]; - } - { - /* `proj` equals to nearest point on the ray closest to the edge `v1 v2` of the `AABB`. */ - const float proj = mul * data->ray.cdot_axis[main_axis]; - float depth_sq, r_point[3]; - if (v1[main_axis] > proj) { /* the nearest point to the ray is the point v1 */ - r_axis_closest[main_axis] = true; - /* `depth` is equivalent the distance of the the projection of v1 on the ray */ - depth_sq = mul + data->ray.direction_scaled_square[main_axis] * v1[main_axis]; - - copy_v3_v3(r_point, v1); - } - else if (v2[main_axis] < proj) { /* the nearest point of the ray is the point v2 */ - r_axis_closest[main_axis] = false; - - depth_sq = mul + data->ray.direction_scaled_square[main_axis] * v2[main_axis]; - - copy_v3_v3(r_point, v2); - } - else { /* the nearest point of the ray is on the edge of the `AABB`. */ - r_axis_closest[main_axis] = (proj - v1[main_axis]) < (v2[main_axis] - proj); - - depth_sq = mul + data->ray.direction_scaled_square[main_axis] * proj; -#if 0 - r_point[0] = main_axis == 0 ? proj : v2[0]; - r_point[1] = main_axis == 1 ? proj : v2[1]; - r_point[2] = main_axis == 2 ? proj : v2[2]; -#else - v2[main_axis] = proj; - copy_v3_v3(r_point, v2); -#endif - } - depth_sq *= depth_sq; - - if (r_depth_sq) - *r_depth_sq = depth_sq; - - /* TODO: scale can be optional */ - r_point[0] *= data->scale[0]; - r_point[1] *= data->scale[1]; - r_point[2] *= data->scale[2]; - - return len_squared_v3(r_point) - depth_sq; - } -} - -/** - * <pre> - * + r_point - * | - * | dist - * | - * +----depth----+orig <-- dir - * - * tangent = dist/depth - * </pre> - */ -static float calc_tangent_sq(BVHNearestRayData *data, BVHNode *node) -{ - float depth_sq; - const float dist_sq = dist_squared_ray_to_aabb_scaled_v3__impl( - data, node->bv, &depth_sq, data->pick_smallest); - - return (dist_sq != 0.0f) ? (dist_sq / depth_sq) : 0.0f; -} - -static float calc_dist_sq_to_ray(BVHNearestRayData *data, BVHNode *node) -{ - return dist_squared_ray_to_aabb_scaled_v3__impl( - data, node->bv, NULL, - data->pick_smallest); -} - -static void dfs_find_lowest_tangent_dfs(BVHNearestRayData *data, BVHNode *node) -{ - if (node->totnode == 0) { - if (data->callback) { - data->callback(data->userdata, data->ray.origin, data->ray.direction, - data->scale, node->index, &data->nearest); - } - else { - data->nearest.index = node->index; - data->nearest.dist_sq = calc_tangent_sq(data, node); - /* TODO: return a value to the data->nearest.co - * not urgent however since users currently define own callbacks */ - } - } - else { - int i; - /* First pick the closest node to dive on */ - if (data->pick_smallest[node->main_axis]) { - for (i = 0; i != node->totnode; i++) { - if (calc_tangent_sq(data, node->children[i]) < data->nearest.dist_sq) { - dfs_find_lowest_tangent_dfs(data, node->children[i]); - } - } - } - else { - for (i = node->totnode - 1; i >= 0; i--) { - if (calc_tangent_sq(data, node->children[i]) < data->nearest.dist_sq) { - dfs_find_lowest_tangent_dfs(data, node->children[i]); - } - } - } - } -} - -static void dfs_find_nearest_to_ray_dfs(BVHNearestRayData *data, BVHNode *node) -{ - if (node->totnode == 0) { - if (data->callback) { - data->callback(data->userdata, data->ray.origin, data->ray.direction, - data->scale, node->index, &data->nearest); - } - else { - data->nearest.index = node->index; - data->nearest.dist_sq = calc_dist_sq_to_ray(data, node); - /* TODO: return a value to the data->nearest.co - * not urgent however since users currently define own callbacks */ - } - } - else { - int i; - /* First pick the closest node to dive on */ - if (data->pick_smallest[node->main_axis]) { - for (i = 0; i != node->totnode; i++) { - if (calc_dist_sq_to_ray(data, node->children[i]) < data->nearest.dist_sq) { - dfs_find_nearest_to_ray_dfs(data, node->children[i]); - } - } - } - else { - for (i = node->totnode - 1; i >= 0; i--) { - if (calc_dist_sq_to_ray(data, node->children[i]) < data->nearest.dist_sq) { - dfs_find_nearest_to_ray_dfs(data, node->children[i]); - } - } - } - } -} - -/** - * Returns the point whose tangent defined by the angle between the point and ray is the lowest - * nearest.dist_sq returns the angle's tangent - */ -int BLI_bvhtree_find_nearest_to_ray_angle( - BVHTree *tree, const float co[3], const float dir[3], - const bool ray_is_normalized, const float scale[3], - BVHTreeNearest *nearest, - BVHTree_NearestToRayCallback callback, void *userdata) -{ - BVHNearestRayData data; - BVHNode *root = tree->nodes[tree->totleaf]; - - data.tree = tree; - - data.callback = callback; - data.userdata = userdata; - - dist_squared_ray_to_aabb_scaled_v3_precalc(&data, co, dir, ray_is_normalized, scale); - - if (nearest) { - memcpy(&data.nearest, nearest, sizeof(*nearest)); - } - else { - data.nearest.index = -1; - data.nearest.dist_sq = FLT_MAX; - } - - /* dfs search */ - if (root) { - if (calc_tangent_sq(&data, root) < data.nearest.dist_sq) - dfs_find_lowest_tangent_dfs(&data, root); - } - - /* copy back results */ - if (nearest) { - memcpy(nearest, &data.nearest, sizeof(*nearest)); - } - - return data.nearest.index; -} - -/* return the nearest point to ray */ -int BLI_bvhtree_find_nearest_to_ray( - BVHTree *tree, const float co[3], const float dir[3], - const bool ray_is_normalized, const float scale[3], - BVHTreeNearest *nearest, - BVHTree_NearestToRayCallback callback, void *userdata) -{ - BVHNearestRayData data; - BVHNode *root = tree->nodes[tree->totleaf]; - - data.tree = tree; - - data.callback = callback; - data.userdata = userdata; - - dist_squared_ray_to_aabb_scaled_v3_precalc(&data, co, dir, ray_is_normalized, scale); - - if (nearest) { - memcpy(&data.nearest, nearest, sizeof(*nearest)); - } - else { - data.nearest.index = -1; - data.nearest.dist_sq = FLT_MAX; - } - - /* dfs search */ - if (root) { - if (calc_dist_sq_to_ray(&data, root) < data.nearest.dist_sq) { - dfs_find_nearest_to_ray_dfs(&data, root); - } - } - - /* copy back results */ - if (nearest) { - memcpy(nearest, &data.nearest, sizeof(*nearest)); - } - - return data.nearest.index; -} - -/** \} */ - - -/* -------------------------------------------------------------------- */ - /** \name BLI_bvhtree_range_query * * Allocs and fills an array with the indexs of node that are on the given spherical range (center, radius). diff --git a/source/blender/blenlib/intern/array_store.c b/source/blender/blenlib/intern/array_store.c index 21ddddad32e..295b39c1a2f 100644 --- a/source/blender/blenlib/intern/array_store.c +++ b/source/blender/blenlib/intern/array_store.c @@ -217,15 +217,12 @@ /** \name Internal Structs * \{ */ -typedef unsigned int uint; -typedef unsigned char ubyte; - typedef uint64_t hash_key; typedef struct BArrayInfo { size_t chunk_stride; - uint chunk_count; + // uint chunk_count; /* UNUSED (other values are derived from this) */ /* pre-calculated */ size_t chunk_byte_size; @@ -291,7 +288,7 @@ typedef struct BChunkList { /* a chunk of an array */ typedef struct BChunk { - const ubyte *data; + const uchar *data; size_t data_len; /** number of #BChunkList using this. */ int users; @@ -332,7 +329,7 @@ static size_t bchunk_list_size(const BChunkList *chunk_list); * \{ */ static BChunk *bchunk_new( - BArrayMemory *bs_mem, const ubyte *data, const size_t data_len) + BArrayMemory *bs_mem, const uchar *data, const size_t data_len) { BChunk *chunk = BLI_mempool_alloc(bs_mem->chunk); chunk->data = data; @@ -345,9 +342,9 @@ static BChunk *bchunk_new( } static BChunk *bchunk_new_copydata( - BArrayMemory *bs_mem, const ubyte *data, const size_t data_len) + BArrayMemory *bs_mem, const uchar *data, const size_t data_len) { - ubyte *data_copy = MEM_mallocN(data_len, __func__); + uchar *data_copy = MEM_mallocN(data_len, __func__); memcpy(data_copy, data, data_len); return bchunk_new(bs_mem, data_copy, data_len); } @@ -367,7 +364,7 @@ static void bchunk_decref( static bool bchunk_data_compare( const BChunk *chunk, - const ubyte *data_base, const size_t data_base_len, + const uchar *data_base, const size_t data_base_len, const size_t offset) { if (offset + (size_t)chunk->data_len <= data_base_len) { @@ -426,14 +423,14 @@ static void bchunk_list_decref( #ifdef USE_VALIDATE_LIST_DATA_PARTIAL static size_t bchunk_list_data_check( - const BChunkList *chunk_list, const ubyte *data) + const BChunkList *chunk_list, const uchar *data) { - size_t total_size = 0; + size_t offset = 0; for (BChunkRef *cref = chunk_list->chunk_refs.first; cref; cref = cref->next) { - if (memcmp(&data[total_size], cref->link->data, cref->link->data_len) != 0) { + if (memcmp(&data[offset], cref->link->data, cref->link->data_len) != 0) { return false; } - total_size += cref->link->data_len; + offset += cref->link->data_len; } return true; } @@ -466,7 +463,7 @@ static void bchunk_list_ensure_min_size_last( chunk_list->chunk_refs.last = cref->prev; chunk_list->chunk_refs_len -= 1; - ubyte *data_merge = MEM_mallocN(data_merge_len, __func__); + uchar *data_merge = MEM_mallocN(data_merge_len, __func__); memcpy(data_merge, chunk_prev->data, chunk_prev->data_len); memcpy(&data_merge[chunk_prev->data_len], chunk_curr->data, chunk_curr->data_len); @@ -487,8 +484,8 @@ static void bchunk_list_ensure_min_size_last( /* merge and split */ const size_t data_prev_len = split; const size_t data_curr_len = data_merge_len - split; - ubyte *data_prev = MEM_mallocN(data_prev_len, __func__); - ubyte *data_curr = MEM_mallocN(data_curr_len, __func__); + uchar *data_prev = MEM_mallocN(data_prev_len, __func__); + uchar *data_curr = MEM_mallocN(data_curr_len, __func__); if (data_prev_len <= chunk_prev->data_len) { const size_t data_curr_shrink_len = chunk_prev->data_len - data_prev_len; @@ -597,11 +594,10 @@ static void bchunk_list_append_only( static void bchunk_list_append_data( const BArrayInfo *info, BArrayMemory *bs_mem, BChunkList *chunk_list, - const ubyte *data, const size_t data_len) + const uchar *data, const size_t data_len) { BLI_assert(data_len != 0); - // printf("data_len: %d\n", data_len); #ifdef USE_MERGE_CHUNKS BLI_assert(data_len <= info->chunk_byte_size_max); @@ -613,13 +609,13 @@ static void bchunk_list_append_data( const size_t data_merge_len = chunk_prev->data_len + data_len; /* realloc for single user */ if (cref->link->users == 1) { - ubyte *data_merge = MEM_reallocN((void *)cref->link->data, data_merge_len); + uchar *data_merge = MEM_reallocN((void *)cref->link->data, data_merge_len); memcpy(&data_merge[chunk_prev->data_len], data, data_len); cref->link->data = data_merge; cref->link->data_len = data_merge_len; } else { - ubyte *data_merge = MEM_mallocN(data_merge_len, __func__); + uchar *data_merge = MEM_mallocN(data_merge_len, __func__); memcpy(data_merge, chunk_prev->data, chunk_prev->data_len); memcpy(&data_merge[chunk_prev->data_len], data, data_len); cref->link = bchunk_new(bs_mem, data_merge, data_merge_len); @@ -639,7 +635,7 @@ static void bchunk_list_append_data( /* don't run this, instead preemptively avoid creating a chunk only to merge it (above). */ #if 0 #ifdef USE_MERGE_CHUNKS - bchunk_list_ensure_min_size_last(info, bs_mem, chunk_list, chunk_size_min); + bchunk_list_ensure_min_size_last(info, bs_mem, chunk_list); #endif #endif } @@ -654,7 +650,7 @@ static void bchunk_list_append_data( static void bchunk_list_append_data_n( const BArrayInfo *info, BArrayMemory *bs_mem, BChunkList *chunk_list, - const ubyte *data, size_t data_len) + const uchar *data, size_t data_len) { size_t data_trim_len, data_last_chunk_len; bchunk_list_calc_trim_len(info, data_len, &data_trim_len, &data_last_chunk_len); @@ -714,7 +710,7 @@ static void bchunk_list_append( static void bchunk_list_fill_from_array( const BArrayInfo *info, BArrayMemory *bs_mem, BChunkList *chunk_list, - const ubyte *data, + const uchar *data, const size_t data_len) { BLI_assert(BLI_listbase_is_empty(&chunk_list->chunk_refs)); @@ -765,13 +761,13 @@ static void bchunk_list_fill_from_array( #define HASH_INIT (5381) -BLI_INLINE uint hash_data_single(const ubyte p) +BLI_INLINE uint hash_data_single(const uchar p) { return (HASH_INIT << 5) + HASH_INIT + (unsigned int)p; } /* hash bytes, from BLI_ghashutil_strhash_n */ -static uint hash_data(const ubyte *key, size_t n) +static uint hash_data(const uchar *key, size_t n) { const signed char *p; unsigned int h = HASH_INIT; @@ -788,7 +784,7 @@ static uint hash_data(const ubyte *key, size_t n) #ifdef USE_HASH_TABLE_ACCUMULATE static void hash_array_from_data( - const BArrayInfo *info, const ubyte *data_slice, const size_t data_slice_len, + const BArrayInfo *info, const uchar *data_slice, const size_t data_slice_len, hash_key *hash_array) { if (info->chunk_stride != 1) { @@ -877,7 +873,7 @@ static void hash_accum_single(hash_key *hash_array, const size_t hash_array_len, static hash_key key_from_chunk_ref( const BArrayInfo *info, const BChunkRef *cref, - /* avoid reallicating each time */ + /* avoid reallocating each time */ hash_key *hash_store, const size_t hash_store_len) { /* in C, will fill in a reusable array */ @@ -899,7 +895,7 @@ static hash_key key_from_chunk_ref( key = hash_store[0]; /* cache the key */ - if (key == HASH_TABLE_KEY_UNSET) { + if (UNLIKELY(key == HASH_TABLE_KEY_UNSET)) { key = HASH_TABLE_KEY_FALLBACK; } chunk->key = key; @@ -929,12 +925,12 @@ static hash_key key_from_chunk_ref( static const BChunkRef *table_lookup( const BArrayInfo *info, BTableRef **table, const size_t table_len, const size_t i_table_start, - const ubyte *data, const size_t data_len, const size_t offset, const hash_key *table_hash_array) + const uchar *data, const size_t data_len, const size_t offset, const hash_key *table_hash_array) { size_t size_left = data_len - offset; hash_key key = table_hash_array[((offset - i_table_start) / info->chunk_stride)]; size_t key_index = (size_t)(key % (hash_key)table_len); - for (BTableRef *tref = table[key_index]; tref; tref = tref->next) { + for (const BTableRef *tref = table[key_index]; tref; tref = tref->next) { const BChunkRef *cref = tref->cref; #ifdef USE_HASH_TABLE_KEY_CACHE if (cref->link->key == key) @@ -985,7 +981,7 @@ static hash_key key_from_chunk_ref(const BArrayInfo *info, const BChunkRef *cref static const BChunkRef *table_lookup( const BArrayInfo *info, BTableRef **table, const size_t table_len, const uint UNUSED(i_table_start), - const ubyte *data, const size_t data_len, const size_t offset, const hash_key *UNUSED(table_hash_array)) + const uchar *data, const size_t data_len, const size_t offset, const hash_key *UNUSED(table_hash_array)) { const size_t data_hash_len = BCHUNK_HASH_LEN * info->chunk_stride; /* TODO, cache */ @@ -1025,7 +1021,7 @@ static const BChunkRef *table_lookup( */ static BChunkList *bchunk_list_from_data_merge( const BArrayInfo *info, BArrayMemory *bs_mem, - const ubyte *data, const size_t data_len_original, + const uchar *data, const size_t data_len_original, const BChunkList *chunk_list_reference) { ASSERT_CHUNKLIST_SIZE(chunk_list_reference, chunk_list_reference->total_size); @@ -1042,10 +1038,8 @@ static BChunkList *bchunk_list_from_data_merge( size_t i_prev = 0; #ifdef USE_FASTPATH_CHUNKS_FIRST - bool full_match = false; - { - full_match = true; + bool full_match = true; const BChunkRef *cref = chunk_list_reference->chunk_refs.first; while (i_prev < data_len_original) { @@ -1433,7 +1427,7 @@ BArrayStore *BLI_array_store_create( BArrayStore *bs = MEM_callocN(sizeof(BArrayStore), __func__); bs->info.chunk_stride = stride; - bs->info.chunk_count = chunk_count; + // bs->info.chunk_count = chunk_count; bs->info.chunk_byte_size = chunk_count * stride; #ifdef USE_MERGE_CHUNKS @@ -1579,7 +1573,7 @@ BArrayState *BLI_array_store_state_add( if (state_reference) { chunk_list = bchunk_list_from_data_merge( &bs->info, &bs->memory, - (const ubyte *)data, data_len, + (const uchar *)data, data_len, /* re-use reference chunks */ state_reference->chunk_list); } @@ -1588,7 +1582,7 @@ BArrayState *BLI_array_store_state_add( bchunk_list_fill_from_array( &bs->info, &bs->memory, chunk_list, - (const ubyte *)data, data_len); + (const uchar *)data, data_len); } chunk_list->users += 1; @@ -1655,7 +1649,7 @@ void BLI_array_store_state_data_get( BLI_assert(data_test_len == state->chunk_list->total_size); #endif - ubyte *data_step = (ubyte *)data; + uchar *data_step = (uchar *)data; for (BChunkRef *cref = state->chunk_list->chunk_refs.first; cref; cref = cref->next) { BLI_assert(cref->link->users > 0); memcpy(data_step, cref->link->data, cref->link->data_len); diff --git a/source/blender/blenlib/intern/math_geom.c b/source/blender/blenlib/intern/math_geom.c index aeb6a550cd9..3cf26ccf904 100644 --- a/source/blender/blenlib/intern/math_geom.c +++ b/source/blender/blenlib/intern/math_geom.c @@ -2337,224 +2337,6 @@ bool isect_ray_aabb_v3_simple( } } -void dist_squared_ray_to_aabb_v3_precalc( - struct NearestRayToAABB_Precalc *data, - const float ray_origin[3], const float ray_direction[3]) -{ - float dir_sq[3]; - - for (int i = 0; i < 3; i++) { - data->ray_origin[i] = ray_origin[i]; - data->ray_direction[i] = ray_direction[i]; - data->ray_inv_dir[i] = (data->ray_direction[i] != 0.0f) ? (1.0f / data->ray_direction[i]) : FLT_MAX; - /* It has to be a function of `ray_inv_dir`, - * since the division of 1 by 0.0f, can be -inf or +inf */ - data->sign[i] = (data->ray_inv_dir[i] < 0.0f); - - dir_sq[i] = SQUARE(data->ray_direction[i]); - } - - /* `diag_sq` Length square of each face diagonal */ - float diag_sq[3] = { - dir_sq[1] + dir_sq[2], - dir_sq[0] + dir_sq[2], - dir_sq[0] + dir_sq[1], - }; - data->idiag_sq[0] = (diag_sq[0] > FLT_EPSILON) ? (1.0f / diag_sq[0]) : FLT_MAX; - data->idiag_sq[1] = (diag_sq[1] > FLT_EPSILON) ? (1.0f / diag_sq[1]) : FLT_MAX; - data->idiag_sq[2] = (diag_sq[2] > FLT_EPSILON) ? (1.0f / diag_sq[2]) : FLT_MAX; - - data->cdot_axis[0] = data->ray_direction[0] * data->idiag_sq[0]; - data->cdot_axis[1] = data->ray_direction[1] * data->idiag_sq[1]; - data->cdot_axis[2] = data->ray_direction[2] * data->idiag_sq[2]; -} - -/** - * Returns the squared distance from a ray to a bound-box `AABB`. - * It is based on `fast_ray_nearest_hit` solution to obtain - * the coordinates of the nearest edge of Bound Box to the ray - */ -float dist_squared_ray_to_aabb_v3( - const struct NearestRayToAABB_Precalc *data, - const float bb_min[3], const float bb_max[3], - bool r_axis_closest[3]) -{ - /* `tmin` is a vector that has the smaller distances to each of the - * infinite planes of the `AABB` faces (hit in nearest face X plane, - * nearest face Y plane and nearest face Z plane) */ - float local_bvmin[3], local_bvmax[3]; - - if (data->sign[0] == 0) { - local_bvmin[0] = bb_min[0] - data->ray_origin[0]; - local_bvmax[0] = bb_max[0] - data->ray_origin[0]; - } - else { - local_bvmin[0] = bb_max[0] - data->ray_origin[0]; - local_bvmax[0] = bb_min[0] - data->ray_origin[0]; - } - - if (data->sign[1] == 0) { - local_bvmin[1] = bb_min[1] - data->ray_origin[1]; - local_bvmax[1] = bb_max[1] - data->ray_origin[1]; - } - else { - local_bvmin[1] = bb_max[1] - data->ray_origin[1]; - local_bvmax[1] = bb_min[1] - data->ray_origin[1]; - } - - if (data->sign[2] == 0) { - local_bvmin[2] = bb_min[2] - data->ray_origin[2]; - local_bvmax[2] = bb_max[2] - data->ray_origin[2]; - } - else { - local_bvmin[2] = bb_max[2] - data->ray_origin[2]; - local_bvmax[2] = bb_min[2] - data->ray_origin[2]; - } - - const float tmin[3] = { - local_bvmin[0] * data->ray_inv_dir[0], - local_bvmin[1] * data->ray_inv_dir[1], - local_bvmin[2] * data->ray_inv_dir[2], - }; - - /* `tmax` is a vector that has the longer distances to each of the - * infinite planes of the `AABB` faces (hit in farthest face X plane, - * farthest face Y plane and farthest face Z plane) */ - const float tmax[3] = { - local_bvmax[0] * data->ray_inv_dir[0], - local_bvmax[1] * data->ray_inv_dir[1], - local_bvmax[2] * data->ray_inv_dir[2], - }; - /* `v1` and `v3` is be the coordinates of the nearest `AABB` edge to the ray*/ - float v1[3], v2[3]; - /* `rtmin` is the highest value of the smaller distances. == max_axis_v3(tmin) - * `rtmax` is the lowest value of longer distances. == min_axis_v3(tmax)*/ - float rtmin, rtmax, mul, rdist; - /* `main_axis` is the axis equivalent to edge close to the ray */ - int main_axis; - - r_axis_closest[0] = false; - r_axis_closest[1] = false; - r_axis_closest[2] = false; - - /* *** min_axis_v3(tmax) *** */ - if ((tmax[0] <= tmax[1]) && (tmax[0] <= tmax[2])) { - // printf("# Hit in X %s\n", data->sign[0] ? "min", "max"); - rtmax = tmax[0]; - v1[0] = v2[0] = local_bvmax[0]; - mul = local_bvmax[0] * data->ray_direction[0]; - main_axis = 3; - r_axis_closest[0] = data->sign[0]; - } - else if ((tmax[1] <= tmax[0]) && (tmax[1] <= tmax[2])) { - // printf("# Hit in Y %s\n", data->sign[1] ? "min", "max"); - rtmax = tmax[1]; - v1[1] = v2[1] = local_bvmax[1]; - mul = local_bvmax[1] * data->ray_direction[1]; - main_axis = 2; - r_axis_closest[1] = data->sign[1]; - } - else { - // printf("# Hit in Z %s\n", data->sign[2] ? "min", "max"); - rtmax = tmax[2]; - v1[2] = v2[2] = local_bvmax[2]; - mul = local_bvmax[2] * data->ray_direction[2]; - main_axis = 1; - r_axis_closest[2] = data->sign[2]; - } - - /* *** max_axis_v3(tmin) *** */ - if ((tmin[0] >= tmin[1]) && (tmin[0] >= tmin[2])) { - // printf("# To X %s\n", data->sign[0] ? "max", "min"); - rtmin = tmin[0]; - v1[0] = v2[0] = local_bvmin[0]; - mul += local_bvmin[0] * data->ray_direction[0]; - main_axis -= 3; - r_axis_closest[0] = !data->sign[0]; - } - else if ((tmin[1] >= tmin[0]) && (tmin[1] >= tmin[2])) { - // printf("# To Y %s\n", data->sign[1] ? "max", "min"); - rtmin = tmin[1]; - v1[1] = v2[1] = local_bvmin[1]; - mul += local_bvmin[1] * data->ray_direction[1]; - main_axis -= 1; - r_axis_closest[1] = !data->sign[1]; - } - else { - // printf("# To Z %s\n", data->sign[2] ? "max", "min"); - rtmin = tmin[2]; - v1[2] = v2[2] = local_bvmin[2]; - mul += local_bvmin[2] * data->ray_direction[2]; - main_axis -= 2; - r_axis_closest[2] = !data->sign[2]; - } - /* *** end min/max axis *** */ - - - /* `if rtmax < 0`, the whole `AABB` is behing us */ - if ((rtmax < 0.0f) && (rtmin < 0.0f)) { - return FLT_MAX; - } - - if (main_axis < 0) { - main_axis += 3; - } - - if (data->sign[main_axis] == 0) { - v1[main_axis] = local_bvmin[main_axis]; - v2[main_axis] = local_bvmax[main_axis]; - } - else { - v1[main_axis] = local_bvmax[main_axis]; - v2[main_axis] = local_bvmin[main_axis]; - } - - /* if rtmin < rtmax, ray intersect `AABB` */ - if (rtmin <= rtmax) { - const float proj = rtmin * data->ray_direction[main_axis]; - rdist = 0.0f; - r_axis_closest[main_axis] = (proj - v1[main_axis]) < (v2[main_axis] - proj); - } - else { - /* `proj` equals to nearest point on the ray closest to the edge `v1 v2` of the `AABB`. */ - const float proj = mul * data->cdot_axis[main_axis]; - float depth; - if (v1[main_axis] > proj) { /* the nearest point to the ray is the point v1 */ - /* `depth` is equivalent the distance from the origin to the point v1, - * Here's a faster way to calculate the dot product of v1 and ray - * (depth = dot_v3v3(v1, data->ray.direction))*/ - depth = mul + data->ray_direction[main_axis] * v1[main_axis]; - rdist = len_squared_v3(v1) - SQUARE(depth); - r_axis_closest[main_axis] = true; - } - else if (v2[main_axis] < proj) { /* the nearest point of the ray is the point v2 */ - depth = mul + data->ray_direction[main_axis] * v2[main_axis]; - rdist = len_squared_v3(v2) - SQUARE(depth); - r_axis_closest[main_axis] = false; - } - else { /* the nearest point of the ray is on the edge of the `AABB`. */ - float v[2]; - mul *= data->idiag_sq[main_axis]; - if (main_axis == 0) { - v[0] = (mul * data->ray_direction[1]) - v1[1]; - v[1] = (mul * data->ray_direction[2]) - v1[2]; - } - else if (main_axis == 1) { - v[0] = (mul * data->ray_direction[0]) - v1[0]; - v[1] = (mul * data->ray_direction[2]) - v1[2]; - } - else { - v[0] = (mul * data->ray_direction[0]) - v1[0]; - v[1] = (mul * data->ray_direction[1]) - v1[1]; - } - rdist = len_squared_v2(v); - r_axis_closest[main_axis] = (proj - v1[main_axis]) < (v2[main_axis] - proj); - } - } - - return rdist; -} - /* find closest point to p on line through (l1, l2) and return lambda, * where (0 <= lambda <= 1) when cp is in the line segment (l1, l2) */ diff --git a/source/blender/blenlib/intern/polyfill2d.c b/source/blender/blenlib/intern/polyfill2d.c index 8d9881e4539..2969b0eccf4 100644 --- a/source/blender/blenlib/intern/polyfill2d.c +++ b/source/blender/blenlib/intern/polyfill2d.c @@ -21,8 +21,15 @@ /** \file blender/blenlib/intern/polyfill2d.c * \ingroup bli * - * A simple implementation of the ear cutting algorithm - * to triangulate simple polygons without holes. + * An ear clipping algorithm to triangulate single boundary polygons. + * + * Details: + * + * - The algorithm guarantees all triangles are assigned (number of coords - 2) + * and that triangles will have non-overlapping indices (even for degenerate geometry). + * - Self-intersections are considered degenerate (resulting triangles will overlap). + * - While multiple polygons aren't supported, holes can still be defined using *key-holes* + * (where the polygon doubles back on its self with *exactly* matching coordinates). * * \note * @@ -74,6 +81,12 @@ typedef signed char eSign; #ifdef USE_KDTREE /** + * Spatial optimization for point-in-triangle intersection checks. + * The simple version of this algorithm is ``O(n^2)`` complexity + * (every point needing to check the triangle defined by every other point), + * Using a binary-tree reduces the complexity to ``O(n log n)`` + * plus some overhead of creating the tree. + * * This is a single purpose KDTree based on BLI_kdtree with some modifications * to better suit polyfill2d. * diff --git a/source/blender/blenlib/intern/rct.c b/source/blender/blenlib/intern/rct.c index ac73a981b45..fd24a00156d 100644 --- a/source/blender/blenlib/intern/rct.c +++ b/source/blender/blenlib/intern/rct.c @@ -351,6 +351,22 @@ void BLI_rcti_init(rcti *rect, int xmin, int xmax, int ymin, int ymax) } } +void BLI_rctf_init_pt_radius(rctf *rect, const float xy[2], float size) +{ + rect->xmin = xy[0] - size; + rect->xmax = xy[0] + size; + rect->ymin = xy[1] - size; + rect->ymax = xy[1] + size; +} + +void BLI_rcti_init_pt_radius(rcti *rect, const int xy[2], int size) +{ + rect->xmin = xy[0] - size; + rect->xmax = xy[0] + size; + rect->ymin = xy[1] - size; + rect->ymax = xy[1] + size; +} + void BLI_rcti_init_minmax(rcti *rect) { rect->xmin = rect->ymin = INT_MAX; diff --git a/source/blender/blenlib/intern/task.c b/source/blender/blenlib/intern/task.c index fc2d9674c2f..49d2ee83a66 100644 --- a/source/blender/blenlib/intern/task.c +++ b/source/blender/blenlib/intern/task.c @@ -48,6 +48,32 @@ */ #define MEMPOOL_SIZE 256 +/* Number of tasks which are pushed directly to local thread queue. + * + * This allows thread to fetch next task without locking the whole queue. + */ +#define LOCALQUEUE_SIZE 1 + +#ifndef NDEBUG +# define ASSERT_THREAD_ID(scheduler, thread_id) \ + do { \ + if (!BLI_thread_is_main()) { \ + TaskThread *thread = pthread_getspecific(scheduler->tls_id_key); \ + if (thread == NULL) { \ + BLI_assert(thread_id == 0); \ + } \ + else { \ + BLI_assert(thread_id == thread->id); \ + } \ + } \ + else { \ + BLI_assert(thread_id == 0); \ + } \ + } while (false) +#else +# define ASSERT_THREAD_ID(scheduler, thread_id) +#endif + typedef struct Task { struct Task *next, *prev; @@ -102,13 +128,16 @@ typedef struct TaskMemPoolStats { } TaskMemPoolStats; #endif +typedef struct TaskThreadLocalStorage { + TaskMemPool task_mempool; + int num_local_queue; + Task *local_queue[LOCALQUEUE_SIZE]; +} TaskThreadLocalStorage; + struct TaskPool { TaskScheduler *scheduler; volatile size_t num; - volatile size_t done; - size_t num_threads; - size_t currently_running_tasks; ThreadMutex num_mutex; ThreadCondition num_cond; @@ -116,6 +145,11 @@ struct TaskPool { ThreadMutex user_mutex; volatile bool do_cancel; + volatile bool do_work; + + volatile bool is_suspended; + ListBase suspended_queue; + size_t num_suspended; /* If set, this pool may never be work_and_wait'ed, which means TaskScheduler * has to use its special background fallback thread in case we are in @@ -123,16 +157,10 @@ struct TaskPool { */ bool run_in_background; - /* This pool is used for caching task pointers for thread id 0. - * This could either point to a global scheduler's task_mempool[0] if the - * pool is handled form the main thread or point to task_mempool_local - * otherwise. - * - * This way we solve possible threading conflicts accessing same global - * memory pool from multiple threads from which wait_work() is called. + /* This is a task scheduler's ID of a thread at which pool was constructed. + * It will be used to access task TLS. */ - TaskMemPool *task_mempool; - TaskMemPool task_mempool_local; + int thread_id; #ifdef DEBUG_STATS TaskMemPoolStats *mempool_stats; @@ -142,7 +170,6 @@ struct TaskPool { struct TaskScheduler { pthread_t *threads; struct TaskThread *task_threads; - TaskMemPool *task_mempool; int num_threads; bool background_thread_only; @@ -151,15 +178,19 @@ struct TaskScheduler { ThreadCondition queue_cond; volatile bool do_exit; + + /* NOTE: In pthread's TLS we store the whole TaskThread structure. */ + pthread_key_t tls_id_key; }; typedef struct TaskThread { TaskScheduler *scheduler; int id; + TaskThreadLocalStorage tls; } TaskThread; /* Helper */ -static void task_data_free(Task *task, const int thread_id) +BLI_INLINE void task_data_free(Task *task, const int thread_id) { if (task->free_taskdata) { if (task->freedata) { @@ -171,28 +202,42 @@ static void task_data_free(Task *task, const int thread_id) } } -BLI_INLINE TaskMemPool *get_task_mempool(TaskPool *pool, const int thread_id) +BLI_INLINE TaskThreadLocalStorage *get_task_tls(TaskPool *pool, + const int thread_id) { + TaskScheduler *scheduler = pool->scheduler; + BLI_assert(thread_id >= 0); + BLI_assert(thread_id <= scheduler->num_threads); if (thread_id == 0) { - return pool->task_mempool; + return &scheduler->task_threads[pool->thread_id].tls; + } + return &scheduler->task_threads[thread_id].tls; +} + +BLI_INLINE void free_task_tls(TaskThreadLocalStorage *tls) +{ + TaskMemPool *task_mempool = &tls->task_mempool; + for (int i = 0; i < task_mempool->num_tasks; ++i) { + MEM_freeN(task_mempool->tasks[i]); } - return &pool->scheduler->task_mempool[thread_id]; } static Task *task_alloc(TaskPool *pool, const int thread_id) { - assert(thread_id <= pool->scheduler->num_threads); + BLI_assert(thread_id <= pool->scheduler->num_threads); if (thread_id != -1) { - assert(thread_id >= 0); - TaskMemPool *mem_pool = get_task_mempool(pool, thread_id); + BLI_assert(thread_id >= 0); + BLI_assert(thread_id <= pool->scheduler->num_threads); + TaskThreadLocalStorage *tls = get_task_tls(pool, thread_id); + TaskMemPool *task_mempool = &tls->task_mempool; /* Try to re-use task memory from a thread local storage. */ - if (mem_pool->num_tasks > 0) { - --mem_pool->num_tasks; + if (task_mempool->num_tasks > 0) { + --task_mempool->num_tasks; /* Success! We've just avoided task allocation. */ #ifdef DEBUG_STATS pool->mempool_stats[thread_id].num_reuse++; #endif - return mem_pool->tasks[mem_pool->num_tasks]; + return task_mempool->tasks[task_mempool->num_tasks]; } /* We are doomed to allocate new task data. */ #ifdef DEBUG_STATS @@ -205,13 +250,14 @@ static Task *task_alloc(TaskPool *pool, const int thread_id) static void task_free(TaskPool *pool, Task *task, const int thread_id) { task_data_free(task, thread_id); - assert(thread_id >= 0); - assert(thread_id <= pool->scheduler->num_threads); - TaskMemPool *mem_pool = get_task_mempool(pool, thread_id); - if (mem_pool->num_tasks < MEMPOOL_SIZE - 1) { + BLI_assert(thread_id >= 0); + BLI_assert(thread_id <= pool->scheduler->num_threads); + TaskThreadLocalStorage *tls = get_task_tls(pool, thread_id); + TaskMemPool *task_mempool = &tls->task_mempool; + if (task_mempool->num_tasks < MEMPOOL_SIZE - 1) { /* Successfully allowed the task to be re-used later. */ - mem_pool->tasks[mem_pool->num_tasks] = task; - ++mem_pool->num_tasks; + task_mempool->tasks[task_mempool->num_tasks] = task; + ++task_mempool->num_tasks; } else { /* Local storage saturated, no other way than just discard @@ -237,8 +283,6 @@ static void task_pool_num_decrease(TaskPool *pool, size_t done) BLI_assert(pool->num >= done); pool->num -= done; - atomic_sub_and_fetch_z(&pool->currently_running_tasks, done); - pool->done += done; if (pool->num == 0) BLI_condition_notify_all(&pool->num_cond); @@ -246,11 +290,11 @@ static void task_pool_num_decrease(TaskPool *pool, size_t done) BLI_mutex_unlock(&pool->num_mutex); } -static void task_pool_num_increase(TaskPool *pool) +static void task_pool_num_increase(TaskPool *pool, size_t new) { BLI_mutex_lock(&pool->num_mutex); - pool->num++; + pool->num += new; BLI_condition_notify_all(&pool->num_cond); BLI_mutex_unlock(&pool->num_mutex); @@ -292,17 +336,10 @@ static bool task_scheduler_thread_wait_pop(TaskScheduler *scheduler, Task **task continue; } - if (atomic_add_and_fetch_z(&pool->currently_running_tasks, 1) <= pool->num_threads || - pool->num_threads == 0) - { - *task = current_task; - found_task = true; - BLI_remlink(&scheduler->queue, *task); - break; - } - else { - atomic_sub_and_fetch_z(&pool->currently_running_tasks, 1); - } + *task = current_task; + found_task = true; + BLI_remlink(&scheduler->queue, *task); + break; } if (!found_task) BLI_condition_wait(&scheduler->queue_cond, &scheduler->queue_mutex); @@ -313,13 +350,34 @@ static bool task_scheduler_thread_wait_pop(TaskScheduler *scheduler, Task **task return true; } +BLI_INLINE void handle_local_queue(TaskThreadLocalStorage *tls, + const int thread_id) +{ + while (tls->num_local_queue > 0) { + /* We pop task from queue before handling it so handler of the task can + * push next job to the local queue. + */ + tls->num_local_queue--; + Task *local_task = tls->local_queue[tls->num_local_queue]; + /* TODO(sergey): Double-check work_and_wait() doesn't handle other's + * pool tasks. + */ + TaskPool *local_pool = local_task->pool; + local_task->run(local_pool, local_task->taskdata, thread_id); + task_free(local_pool, local_task, thread_id); + } +} + static void *task_scheduler_thread_run(void *thread_p) { TaskThread *thread = (TaskThread *) thread_p; + TaskThreadLocalStorage *tls = &thread->tls; TaskScheduler *scheduler = thread->scheduler; int thread_id = thread->id; Task *task; + pthread_setspecific(scheduler->tls_id_key, thread); + /* keep popping off tasks */ while (task_scheduler_thread_wait_pop(scheduler, &task)) { TaskPool *pool = task->pool; @@ -330,6 +388,9 @@ static void *task_scheduler_thread_run(void *thread_p) /* delete task */ task_free(pool, task, thread_id); + /* Handle all tasks from local queue. */ + handle_local_queue(tls, thread_id); + /* notify pool task was done */ task_pool_num_decrease(pool, 1); } @@ -359,20 +420,24 @@ TaskScheduler *BLI_task_scheduler_create(int num_threads) /* Add background-only thread if needed. */ if (num_threads == 0) { - scheduler->background_thread_only = true; - num_threads = 1; + scheduler->background_thread_only = true; + num_threads = 1; } + scheduler->task_threads = MEM_callocN(sizeof(TaskThread) * (num_threads + 1), + "TaskScheduler task threads"); + + pthread_key_create(&scheduler->tls_id_key, NULL); + /* launch threads that will be waiting for work */ if (num_threads > 0) { int i; scheduler->num_threads = num_threads; scheduler->threads = MEM_callocN(sizeof(pthread_t) * num_threads, "TaskScheduler threads"); - scheduler->task_threads = MEM_callocN(sizeof(TaskThread) * num_threads, "TaskScheduler task threads"); for (i = 0; i < num_threads; i++) { - TaskThread *thread = &scheduler->task_threads[i]; + TaskThread *thread = &scheduler->task_threads[i + 1]; thread->scheduler = scheduler; thread->id = i + 1; @@ -380,9 +445,6 @@ TaskScheduler *BLI_task_scheduler_create(int num_threads) fprintf(stderr, "TaskScheduler failed to launch thread %d/%d\n", i, num_threads); } } - - scheduler->task_mempool = MEM_callocN(sizeof(*scheduler->task_mempool) * (num_threads + 1), - "TaskScheduler task_mempool"); } return scheduler; @@ -398,6 +460,8 @@ void BLI_task_scheduler_free(TaskScheduler *scheduler) BLI_condition_notify_all(&scheduler->queue_cond); BLI_mutex_unlock(&scheduler->queue_mutex); + pthread_key_delete(scheduler->tls_id_key); + /* delete threads */ if (scheduler->threads) { int i; @@ -412,17 +476,12 @@ void BLI_task_scheduler_free(TaskScheduler *scheduler) /* Delete task thread data */ if (scheduler->task_threads) { - MEM_freeN(scheduler->task_threads); - } - - /* Delete task memory pool */ - if (scheduler->task_mempool) { - for (int i = 0; i <= scheduler->num_threads; ++i) { - for (int j = 0; j < scheduler->task_mempool[i].num_tasks; ++j) { - MEM_freeN(scheduler->task_mempool[i].tasks[j]); - } + for (int i = 0; i < scheduler->num_threads + 1; ++i) { + TaskThreadLocalStorage *tls = &scheduler->task_threads[i].tls; + free_task_tls(tls); } - MEM_freeN(scheduler->task_mempool); + + MEM_freeN(scheduler->task_threads); } /* delete leftover tasks */ @@ -445,7 +504,7 @@ int BLI_task_scheduler_num_threads(TaskScheduler *scheduler) static void task_scheduler_push(TaskScheduler *scheduler, Task *task, TaskPriority priority) { - task_pool_num_increase(task->pool); + task_pool_num_increase(task->pool, 1); /* add task to queue */ BLI_mutex_lock(&scheduler->queue_mutex); @@ -471,7 +530,7 @@ static void task_scheduler_clear(TaskScheduler *scheduler, TaskPool *pool) nexttask = task->next; if (task->pool == pool) { - task_data_free(task, 0); + task_data_free(task, pool->thread_id); BLI_freelinkN(&scheduler->queue, task); done++; @@ -486,7 +545,10 @@ static void task_scheduler_clear(TaskScheduler *scheduler, TaskPool *pool) /* Task Pool */ -static TaskPool *task_pool_create_ex(TaskScheduler *scheduler, void *userdata, const bool is_background) +static TaskPool *task_pool_create_ex(TaskScheduler *scheduler, + void *userdata, + const bool is_background, + const bool is_suspended) { TaskPool *pool = MEM_mallocN(sizeof(TaskPool), "TaskPool"); @@ -504,10 +566,11 @@ static TaskPool *task_pool_create_ex(TaskScheduler *scheduler, void *userdata, c pool->scheduler = scheduler; pool->num = 0; - pool->done = 0; - pool->num_threads = 0; - pool->currently_running_tasks = 0; pool->do_cancel = false; + pool->do_work = false; + pool->is_suspended = is_suspended; + pool->num_suspended = 0; + pool->suspended_queue.first = pool->suspended_queue.last = NULL; pool->run_in_background = is_background; BLI_mutex_init(&pool->num_mutex); @@ -517,11 +580,21 @@ static TaskPool *task_pool_create_ex(TaskScheduler *scheduler, void *userdata, c BLI_mutex_init(&pool->user_mutex); if (BLI_thread_is_main()) { - pool->task_mempool = scheduler->task_mempool; + pool->thread_id = 0; } else { - pool->task_mempool = &pool->task_mempool_local; - pool->task_mempool_local.num_tasks = 0; + TaskThread *thread = pthread_getspecific(scheduler->tls_id_key); + /* NOTE: It is possible that pool is created from non-main thread + * which isn't a scheduler thread. In this case pthread's TLS will + * be NULL and we can safely consider thread id 0 for the main + * thread of this pool (the one which does wort_and_wait()). + */ + if (thread == NULL) { + pool->thread_id = 0; + } + else { + pool->thread_id = thread->id; + } } #ifdef DEBUG_STATS @@ -548,7 +621,7 @@ static TaskPool *task_pool_create_ex(TaskScheduler *scheduler, void *userdata, c */ TaskPool *BLI_task_pool_create(TaskScheduler *scheduler, void *userdata) { - return task_pool_create_ex(scheduler, userdata, false); + return task_pool_create_ex(scheduler, userdata, false, false); } /** @@ -563,25 +636,28 @@ TaskPool *BLI_task_pool_create(TaskScheduler *scheduler, void *userdata) */ TaskPool *BLI_task_pool_create_background(TaskScheduler *scheduler, void *userdata) { - return task_pool_create_ex(scheduler, userdata, true); + return task_pool_create_ex(scheduler, userdata, true, false); +} + +/** + * Similar to BLI_task_pool_create() but does not schedule any tasks for execution + * for until BLI_task_pool_work_and_wait() is called. This helps reducing therading + * overhead when pushing huge amount of small initial tasks from the main thread. + */ +TaskPool *BLI_task_pool_create_suspended(TaskScheduler *scheduler, void *userdata) +{ + return task_pool_create_ex(scheduler, userdata, false, true); } void BLI_task_pool_free(TaskPool *pool) { - BLI_task_pool_stop(pool); + BLI_task_pool_cancel(pool); BLI_mutex_end(&pool->num_mutex); BLI_condition_end(&pool->num_cond); BLI_mutex_end(&pool->user_mutex); - /* Free local memory pool, those pointers are lost forever. */ - if (pool->task_mempool == &pool->task_mempool_local) { - for (int i = 0; i < pool->task_mempool_local.num_tasks; i++) { - MEM_freeN(pool->task_mempool_local.tasks[i]); - } - } - #ifdef DEBUG_STATS printf("Thread ID Allocated Reused Discarded\n"); for (int i = 0; i < pool->scheduler->num_threads + 1; ++i) { @@ -612,6 +688,25 @@ static void task_pool_push( task->freedata = freedata; task->pool = pool; + if (pool->is_suspended) { + BLI_addhead(&pool->suspended_queue, task); + atomic_fetch_and_add_z(&pool->num_suspended, 1); + return; + } + + if (thread_id != -1 && + (thread_id != pool->thread_id || pool->do_work)) + { + ASSERT_THREAD_ID(pool->scheduler, thread_id); + + TaskThreadLocalStorage *tls = get_task_tls(pool, thread_id); + if (tls->num_local_queue < LOCALQUEUE_SIZE) { + tls->local_queue[tls->num_local_queue] = task; + tls->num_local_queue++; + return; + } + } + task_scheduler_push(pool->scheduler, task, priority); } @@ -636,8 +731,27 @@ void BLI_task_pool_push_from_thread(TaskPool *pool, TaskRunFunction run, void BLI_task_pool_work_and_wait(TaskPool *pool) { + TaskThreadLocalStorage *tls = get_task_tls(pool, pool->thread_id); TaskScheduler *scheduler = pool->scheduler; + if (atomic_fetch_and_and_uint8((uint8_t*)&pool->is_suspended, 0)) { + if (pool->num_suspended) { + task_pool_num_increase(pool, pool->num_suspended); + BLI_mutex_lock(&scheduler->queue_mutex); + + BLI_movelisttolist(&scheduler->queue, &pool->suspended_queue); + + BLI_condition_notify_all(&scheduler->queue_cond); + BLI_mutex_unlock(&scheduler->queue_mutex); + + } + pool->is_suspended = false; + } + + pool->do_work = true; + + ASSERT_THREAD_ID(pool->scheduler, pool->thread_id); + BLI_mutex_lock(&pool->num_mutex); while (pool->num != 0) { @@ -651,16 +765,12 @@ void BLI_task_pool_work_and_wait(TaskPool *pool) /* find task from this pool. if we get a task from another pool, * we can get into deadlock */ - if (pool->num_threads == 0 || - pool->currently_running_tasks < pool->num_threads) - { - for (task = scheduler->queue.first; task; task = task->next) { - if (task->pool == pool) { - work_task = task; - found_task = true; - BLI_remlink(&scheduler->queue, task); - break; - } + for (task = scheduler->queue.first; task; task = task->next) { + if (task->pool == pool) { + work_task = task; + found_task = true; + BLI_remlink(&scheduler->queue, task); + break; } } @@ -669,11 +779,13 @@ void BLI_task_pool_work_and_wait(TaskPool *pool) /* if found task, do it, otherwise wait until other tasks are done */ if (found_task) { /* run task */ - atomic_add_and_fetch_z(&pool->currently_running_tasks, 1); - work_task->run(pool, work_task->taskdata, 0); + work_task->run(pool, work_task->taskdata, pool->thread_id); /* delete task */ - task_free(pool, task, 0); + task_free(pool, task, pool->thread_id); + + /* Handle all tasks from local queue. */ + handle_local_queue(tls, pool->thread_id); /* notify pool task was done */ task_pool_num_decrease(pool, 1); @@ -688,22 +800,8 @@ void BLI_task_pool_work_and_wait(TaskPool *pool) } BLI_mutex_unlock(&pool->num_mutex); -} -int BLI_pool_get_num_threads(TaskPool *pool) -{ - if (pool->num_threads != 0) { - return pool->num_threads; - } - else { - return BLI_task_scheduler_num_threads(pool->scheduler); - } -} - -void BLI_pool_set_num_threads(TaskPool *pool, int num_threads) -{ - /* NOTE: Don't try to modify threads while tasks are running! */ - pool->num_threads = num_threads; + handle_local_queue(tls, pool->thread_id); } void BLI_task_pool_cancel(TaskPool *pool) @@ -721,13 +819,6 @@ void BLI_task_pool_cancel(TaskPool *pool) pool->do_cancel = false; } -void BLI_task_pool_stop(TaskPool *pool) -{ - task_scheduler_clear(pool->scheduler, pool); - - BLI_assert(pool->num == 0); -} - bool BLI_task_pool_canceled(TaskPool *pool) { return pool->do_cancel; @@ -743,11 +834,6 @@ ThreadMutex *BLI_task_pool_user_mutex(TaskPool *pool) return &pool->user_mutex; } -size_t BLI_task_pool_tasks_done(TaskPool *pool) -{ - return pool->done; -} - /* Parallel range routines */ /** @@ -918,7 +1004,8 @@ static void task_parallel_range_ex( BLI_task_pool_push_from_thread(task_pool, parallel_range_func, userdata_chunk_local, false, - TASK_PRIORITY_HIGH, 0); + TASK_PRIORITY_HIGH, + task_pool->thread_id); } BLI_task_pool_work_and_wait(task_pool); @@ -1124,7 +1211,8 @@ void BLI_task_parallel_listbase( BLI_task_pool_push_from_thread(task_pool, parallel_listbase_func, NULL, false, - TASK_PRIORITY_HIGH, 0); + TASK_PRIORITY_HIGH, + task_pool->thread_id); } BLI_task_pool_work_and_wait(task_pool); diff --git a/source/blender/blenlib/intern/threads.c b/source/blender/blenlib/intern/threads.c index b60981802aa..77da3be0600 100644 --- a/source/blender/blenlib/intern/threads.c +++ b/source/blender/blenlib/intern/threads.c @@ -54,6 +54,8 @@ # include <sys/time.h> #endif +#include "atomic_ops.h" + #if defined(__APPLE__) && defined(_OPENMP) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 2) && !defined(__clang__) # define USE_APPLE_OMP_FIX #endif @@ -124,7 +126,7 @@ static pthread_mutex_t _colormanage_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t _fftw_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t _view3d_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_t mainid; -static int thread_levels = 0; /* threads can be invoked inside threads */ +static unsigned int thread_levels = 0; /* threads can be invoked inside threads */ static int num_threads_override = 0; /* just a max for security reasons */ @@ -198,9 +200,9 @@ void BLI_init_threads(ListBase *threadbase, void *(*do_thread)(void *), int tot) tslot->avail = 1; } } - - BLI_spin_lock(&_malloc_lock); - if (thread_levels == 0) { + + unsigned int level = atomic_fetch_and_add_u(&thread_levels, 1); + if (level == 0) { MEM_set_lock_callback(BLI_lock_malloc_thread, BLI_unlock_malloc_thread); #ifdef USE_APPLE_OMP_FIX @@ -210,9 +212,6 @@ void BLI_init_threads(ListBase *threadbase, void *(*do_thread)(void *), int tot) thread_tls_data = pthread_getspecific(gomp_tls_key); #endif } - - thread_levels++; - BLI_spin_unlock(&_malloc_lock); } /* amount of available threads */ @@ -331,11 +330,10 @@ void BLI_end_threads(ListBase *threadbase) BLI_freelistN(threadbase); } - BLI_spin_lock(&_malloc_lock); - thread_levels--; - if (thread_levels == 0) + unsigned int level = atomic_sub_and_fetch_u(&thread_levels, 1); + if (level == 0) { MEM_set_lock_callback(NULL, NULL); - BLI_spin_unlock(&_malloc_lock); + } } /* System Information */ @@ -812,26 +810,17 @@ void BLI_thread_queue_wait_finish(ThreadQueue *queue) void BLI_begin_threaded_malloc(void) { - /* Used for debug only */ - /* BLI_assert(thread_levels >= 0); */ - - BLI_spin_lock(&_malloc_lock); - if (thread_levels == 0) { + unsigned int level = atomic_fetch_and_add_u(&thread_levels, 1); + if (level == 0) { MEM_set_lock_callback(BLI_lock_malloc_thread, BLI_unlock_malloc_thread); } - thread_levels++; - BLI_spin_unlock(&_malloc_lock); } void BLI_end_threaded_malloc(void) { - /* Used for debug only */ - /* BLI_assert(thread_levels >= 0); */ - - BLI_spin_lock(&_malloc_lock); - thread_levels--; - if (thread_levels == 0) + unsigned int level = atomic_sub_and_fetch_u(&thread_levels, 1); + if (level == 0) { MEM_set_lock_callback(NULL, NULL); - BLI_spin_unlock(&_malloc_lock); + } } diff --git a/source/blender/blenloader/intern/readfile.c b/source/blender/blenloader/intern/readfile.c index 971f5d54b10..e7d3d4c369a 100644 --- a/source/blender/blenloader/intern/readfile.c +++ b/source/blender/blenloader/intern/readfile.c @@ -5317,6 +5317,37 @@ static void direct_link_modifiers(FileData *fd, ListBase *lb) MeshSeqCacheModifierData *msmcd = (MeshSeqCacheModifierData *)md; msmcd->reader = NULL; } + else if (md->type == eModifierType_SurfaceDeform) { + SurfaceDeformModifierData *smd = (SurfaceDeformModifierData *)md; + + smd->verts = newdataadr(fd, smd->verts); + + if (smd->verts) { + for (int i = 0; i < smd->numverts; i++) { + smd->verts[i].binds = newdataadr(fd, smd->verts[i].binds); + + if (smd->verts[i].binds) { + for (int j = 0; j < smd->verts[i].numbinds; j++) { + smd->verts[i].binds[j].vert_inds = newdataadr(fd, smd->verts[i].binds[j].vert_inds); + smd->verts[i].binds[j].vert_weights = newdataadr(fd, smd->verts[i].binds[j].vert_weights); + + if (fd->flags & FD_FLAGS_SWITCH_ENDIAN) { + if (smd->verts[i].binds[j].vert_inds) + BLI_endian_switch_uint32_array(smd->verts[i].binds[j].vert_inds, smd->verts[i].binds[j].numverts); + + if (smd->verts[i].binds[j].vert_weights) { + if (smd->verts[i].binds[j].mode == MOD_SDEF_MODE_CENTROID || + smd->verts[i].binds[j].mode == MOD_SDEF_MODE_LOOPTRI) + BLI_endian_switch_float_array(smd->verts[i].binds[j].vert_weights, 3); + else + BLI_endian_switch_float_array(smd->verts[i].binds[j].vert_weights, smd->verts[i].binds[j].numverts); + } + } + } + } + } + } + } } } diff --git a/source/blender/blenloader/intern/versioning_270.c b/source/blender/blenloader/intern/versioning_270.c index 610c74148eb..8eb61251ddd 100644 --- a/source/blender/blenloader/intern/versioning_270.c +++ b/source/blender/blenloader/intern/versioning_270.c @@ -1577,6 +1577,41 @@ void blo_do_versions_270(FileData *fd, Library *UNUSED(lib), Main *main) } } + /* Fix for T50736, Glare comp node using same var for two different things. */ + if (!DNA_struct_elem_find(fd->filesdna, "NodeGlare", "char", "star_45")) { + FOREACH_NODETREE(main, ntree, id) { + if (ntree->type == NTREE_COMPOSIT) { + ntreeSetTypes(NULL, ntree); + for (bNode *node = ntree->nodes.first; node; node = node->next) { + if (node->type == CMP_NODE_GLARE) { + NodeGlare *ndg = node->storage; + switch (ndg->type) { + case 2: /* Grrrr! magic numbers :( */ + ndg->streaks = ndg->angle; + break; + case 0: + ndg->star_45 = ndg->angle != 0; + break; + default: + break; + } + } + } + } + } FOREACH_NODETREE_END + } + + if (!DNA_struct_elem_find(fd->filesdna, "SurfaceDeformModifierData", "float", "mat[4][4]")) { + for (Object *ob = main->object.first; ob; ob = ob->id.next) { + for (ModifierData *md = ob->modifiers.first; md; md = md->next) { + if (md->type == eModifierType_SurfaceDeform) { + SurfaceDeformModifierData *smd = (SurfaceDeformModifierData *)md; + unit_m4(smd->mat); + } + } + } + } + /* initialize regiondata for each SpaceClip, due to the newly brought RegionSpaceClip */ if (!DNA_struct_elem_find(fd->filesdna, "SpaceClip", "MovieClip", "*secondary_clip")) { for (bScreen *screen = main->screen.first; screen != NULL; screen = screen->id.next) { diff --git a/source/blender/blenloader/intern/writefile.c b/source/blender/blenloader/intern/writefile.c index 106943d15dc..eef38c479e8 100644 --- a/source/blender/blenloader/intern/writefile.c +++ b/source/blender/blenloader/intern/writefile.c @@ -78,7 +78,7 @@ * - write #TEST (#RenderInfo struct. 128x128 blend file preview is optional). * - write #GLOB (#FileGlobal struct) (some global vars). * - write #DNA1 (#SDNA struct) - * - write #USER (#UserDef struct) if filename is ``~/X.XX/config/startup.blend``. + * - write #USER (#UserDef struct) if filename is ``~/.config/blender/X.XX/config/startup.blend``. */ @@ -1026,6 +1026,25 @@ static void write_nodetree(WriteData *wd, bNodeTree *ntree) { /* pass */ } + else if ((ntree->type == NTREE_COMPOSIT) && (node->type == CMP_NODE_GLARE)) { + /* Simple forward compat for fix for T50736. + * Not ideal (there is no ideal solution here), but should do for now. */ + NodeGlare *ndg = node->storage; + /* Not in undo case. */ + if (!wd->current) { + switch (ndg->type) { + case 2: /* Grrrr! magic numbers :( */ + ndg->angle = ndg->streaks; + break; + case 0: + ndg->angle = ndg->star_45; + break; + default: + break; + } + } + writestruct_id(wd, DATA, node->typeinfo->storagename, 1, node->storage); + } else { writestruct_id(wd, DATA, node->typeinfo->storagename, 1, node->storage); } @@ -1818,6 +1837,32 @@ static void write_modifiers(WriteData *wd, ListBase *modbase) writedata(wd, DATA, sizeof(float[3]) * csmd->bind_coords_num, csmd->bind_coords); } } + else if (md->type == eModifierType_SurfaceDeform) { + SurfaceDeformModifierData *smd = (SurfaceDeformModifierData *)md; + + writestruct(wd, DATA, SDefVert, smd->numverts, smd->verts); + + if (smd->verts) { + for (int i = 0; i < smd->numverts; i++) { + writestruct(wd, DATA, SDefBind, smd->verts[i].numbinds, smd->verts[i].binds); + + if (smd->verts[i].binds) { + for (int j = 0; j < smd->verts[i].numbinds; j++) { + writedata(wd, DATA, sizeof(int) * smd->verts[i].binds[j].numverts, smd->verts[i].binds[j].vert_inds); + + if (smd->verts[i].binds[j].mode == MOD_SDEF_MODE_CENTROID || + smd->verts[i].binds[j].mode == MOD_SDEF_MODE_LOOPTRI) + { + writedata(wd, DATA, sizeof(float) * 3, smd->verts[i].binds[j].vert_weights); + } + else { + writedata(wd, DATA, sizeof(float) * smd->verts[i].binds[j].numverts, smd->verts[i].binds[j].vert_weights); + } + } + } + } + } + } } } diff --git a/source/blender/bmesh/operators/bmo_primitive.c b/source/blender/bmesh/operators/bmo_primitive.c index 8408169d85e..723e0b168e0 100644 --- a/source/blender/bmesh/operators/bmo_primitive.c +++ b/source/blender/bmesh/operators/bmo_primitive.c @@ -1122,7 +1122,7 @@ static void bm_mesh_calc_uvs_sphere_face(BMFace *f, const int cd_loop_uv_offset) } /* Shift borderline coordinates to the left. */ - if (fabsf(theta - M_PI) < 0.0001f) { + if (fabsf(theta - (float)M_PI) < 0.0001f) { theta = -M_PI; } diff --git a/source/blender/bmesh/tools/bmesh_intersect.c b/source/blender/bmesh/tools/bmesh_intersect.c index 58234ddf3bd..2cb82d0fc02 100644 --- a/source/blender/bmesh/tools/bmesh_intersect.c +++ b/source/blender/bmesh/tools/bmesh_intersect.c @@ -986,7 +986,7 @@ bool BM_mesh_intersect( struct BMLoop *(*looptris)[3], const int looptris_tot, int (*test_fn)(BMFace *f, void *user_data), void *user_data, const bool use_self, const bool use_separate, const bool use_dissolve, const bool use_island_connect, - const int boolean_mode, + const bool use_edge_tag, const int boolean_mode, const float eps) { struct ISectState s; @@ -1526,7 +1526,7 @@ bool BM_mesh_intersect( BM_mesh_edgesplit(bm, false, true, false); } - else if (boolean_mode != BMESH_ISECT_BOOLEAN_NONE) { + else if (boolean_mode != BMESH_ISECT_BOOLEAN_NONE || use_edge_tag) { GSetIterator gs_iter; /* no need to clear for boolean */ diff --git a/source/blender/bmesh/tools/bmesh_intersect.h b/source/blender/bmesh/tools/bmesh_intersect.h index d0cc41654eb..51926a01710 100644 --- a/source/blender/bmesh/tools/bmesh_intersect.h +++ b/source/blender/bmesh/tools/bmesh_intersect.h @@ -30,7 +30,7 @@ bool BM_mesh_intersect( struct BMLoop *(*looptris)[3], const int looptris_tot, int (*test_fn)(BMFace *f, void *user_data), void *user_data, const bool use_self, const bool use_separate, const bool use_dissolve, const bool use_island_connect, - const int boolean_mode, + const bool use_edge_tag, const int boolean_mode, const float eps); enum { diff --git a/source/blender/collada/SkinInfo.cpp b/source/blender/collada/SkinInfo.cpp index 7242a24523c..71875d6274a 100644 --- a/source/blender/collada/SkinInfo.cpp +++ b/source/blender/collada/SkinInfo.cpp @@ -230,7 +230,6 @@ void SkinInfo::link_armature(bContext *C, Object *ob, std::map<COLLADAFW::Unique ModifierData *md = ED_object_modifier_add(NULL, bmain, scene, ob, NULL, eModifierType_Armature); ArmatureModifierData *amd = (ArmatureModifierData *)md; amd->object = ob_arm; - struct bArmature *armature = (bArmature *)ob_arm->data; #if 1 bc_set_parent(ob, ob_arm, C); diff --git a/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.cpp b/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.cpp index e1ada9a8c39..5f78067220a 100644 --- a/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.cpp +++ b/source/blender/compositor/operations/COM_ConvolutionEdgeFilterOperation.cpp @@ -94,4 +94,10 @@ void ConvolutionEdgeFilterOperation::executePixel(float output[4], int x, int y, output[2] = output[2] * value[0] + in2[2] * mval; output[3] = in2[3]; + + /* Make sure we don't return negative color. */ + output[0] = max(output[0], 0.0f); + output[1] = max(output[1], 0.0f); + output[2] = max(output[2], 0.0f); + output[3] = max(output[3], 0.0f); } diff --git a/source/blender/compositor/operations/COM_ConvolutionFilterOperation.cpp b/source/blender/compositor/operations/COM_ConvolutionFilterOperation.cpp index 68ec2be5ebd..6ac1ff9a1eb 100644 --- a/source/blender/compositor/operations/COM_ConvolutionFilterOperation.cpp +++ b/source/blender/compositor/operations/COM_ConvolutionFilterOperation.cpp @@ -107,6 +107,12 @@ void ConvolutionFilterOperation::executePixel(float output[4], int x, int y, voi output[1] = output[1] * value[0] + in2[1] * mval; output[2] = output[2] * value[0] + in2[2] * mval; output[3] = output[3] * value[0] + in2[3] * mval; + + /* Make sure we don't return negative color. */ + output[0] = max(output[0], 0.0f); + output[1] = max(output[1], 0.0f); + output[2] = max(output[2], 0.0f); + output[3] = max(output[3], 0.0f); } bool ConvolutionFilterOperation::determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output) diff --git a/source/blender/compositor/operations/COM_GlareSimpleStarOperation.cpp b/source/blender/compositor/operations/COM_GlareSimpleStarOperation.cpp index 957ac5af748..57aa3a1bac2 100644 --- a/source/blender/compositor/operations/COM_GlareSimpleStarOperation.cpp +++ b/source/blender/compositor/operations/COM_GlareSimpleStarOperation.cpp @@ -44,18 +44,18 @@ void GlareSimpleStarOperation::generateGlare(float *data, MemoryBuffer *inputTil xp = x + i; tbuf1->read(c, x, y); mul_v3_fl(c, f1); - tbuf1->read(tc, (settings->angle ? xm : x), ym); + tbuf1->read(tc, (settings->star_45 ? xm : x), ym); madd_v3_v3fl(c, tc, f2); - tbuf1->read(tc, (settings->angle ? xp : x), yp); + tbuf1->read(tc, (settings->star_45 ? xp : x), yp); madd_v3_v3fl(c, tc, f2); c[3] = 1.0f; tbuf1->writePixel(x, y, c); tbuf2->read(c, x, y); mul_v3_fl(c, f1); - tbuf2->read(tc, xm, (settings->angle ? yp : y)); + tbuf2->read(tc, xm, (settings->star_45 ? yp : y)); madd_v3_v3fl(c, tc, f2); - tbuf2->read(tc, xp, (settings->angle ? ym : y)); + tbuf2->read(tc, xp, (settings->star_45 ? ym : y)); madd_v3_v3fl(c, tc, f2); c[3] = 1.0f; tbuf2->writePixel(x, y, c); @@ -73,18 +73,18 @@ void GlareSimpleStarOperation::generateGlare(float *data, MemoryBuffer *inputTil xp = x + i; tbuf1->read(c, x, y); mul_v3_fl(c, f1); - tbuf1->read(tc, (settings->angle ? xm : x), ym); + tbuf1->read(tc, (settings->star_45 ? xm : x), ym); madd_v3_v3fl(c, tc, f2); - tbuf1->read(tc, (settings->angle ? xp : x), yp); + tbuf1->read(tc, (settings->star_45 ? xp : x), yp); madd_v3_v3fl(c, tc, f2); c[3] = 1.0f; tbuf1->writePixel(x, y, c); tbuf2->read(c, x, y); mul_v3_fl(c, f1); - tbuf2->read(tc, xm, (settings->angle ? yp : y)); + tbuf2->read(tc, xm, (settings->star_45 ? yp : y)); madd_v3_v3fl(c, tc, f2); - tbuf2->read(tc, xp, (settings->angle ? ym : y)); + tbuf2->read(tc, xp, (settings->star_45 ? ym : y)); madd_v3_v3fl(c, tc, f2); c[3] = 1.0f; tbuf2->writePixel(x, y, c); diff --git a/source/blender/compositor/operations/COM_GlareStreaksOperation.cpp b/source/blender/compositor/operations/COM_GlareStreaksOperation.cpp index da6076337b4..535f2952e5d 100644 --- a/source/blender/compositor/operations/COM_GlareStreaksOperation.cpp +++ b/source/blender/compositor/operations/COM_GlareStreaksOperation.cpp @@ -28,7 +28,7 @@ void GlareStreaksOperation::generateGlare(float *data, MemoryBuffer *inputTile, int x, y, n; unsigned int nump = 0; float c1[4], c2[4], c3[4], c4[4]; - float a, ang = DEG2RADF(360.0f) / (float)settings->angle; + float a, ang = DEG2RADF(360.0f) / (float)settings->streaks; int size = inputTile->getWidth() * inputTile->getHeight(); int size4 = size * 4; diff --git a/source/blender/depsgraph/intern/eval/deg_eval.cc b/source/blender/depsgraph/intern/eval/deg_eval.cc index 3a042535d26..e739bc9dbb5 100644 --- a/source/blender/depsgraph/intern/eval/deg_eval.cc +++ b/source/blender/depsgraph/intern/eval/deg_eval.cc @@ -95,105 +95,38 @@ static void deg_task_run_func(TaskPool *pool, /* Should only be the case for NOOPs, which never get to this point. */ BLI_assert(node->evaluate); - while (true) { - /* Get context. */ - /* TODO: Who initialises this? "Init" operations aren't able to - * initialise it!!! - */ - /* TODO(sergey): We don't use component contexts at this moment. */ - /* ComponentDepsNode *comp = node->owner; */ - BLI_assert(node->owner != NULL); - - /* Since we're not leaving the thread for until the graph branches it is - * possible to have NO-OP on the way. for which evaluate() will be NULL. - * but that's all fine, we'll just scheduler it's children. - */ - if (node->evaluate) { + /* Get context. */ + /* TODO: Who initialises this? "Init" operations aren't able to + * initialise it!!! + */ + /* TODO(sergey): We don't use component contexts at this moment. */ + /* ComponentDepsNode *comp = node->owner; */ + BLI_assert(node->owner != NULL); + + /* Since we're not leaving the thread for until the graph branches it is + * possible to have NO-OP on the way. for which evaluate() will be NULL. + * but that's all fine, we'll just scheduler it's children. + */ + if (node->evaluate) { /* Take note of current time. */ #ifdef USE_DEBUGGER - double start_time = PIL_check_seconds_timer(); - DepsgraphDebug::task_started(state->graph, node); + double start_time = PIL_check_seconds_timer(); + DepsgraphDebug::task_started(state->graph, node); #endif - /* Perform operation. */ - node->evaluate(state->eval_ctx); + /* Perform operation. */ + node->evaluate(state->eval_ctx); /* Note how long this took. */ #ifdef USE_DEBUGGER - double end_time = PIL_check_seconds_timer(); - DepsgraphDebug::task_completed(state->graph, - node, - end_time - start_time); + double end_time = PIL_check_seconds_timer(); + DepsgraphDebug::task_completed(state->graph, + node, + end_time - start_time); #endif - } - - /* If there's only one outgoing link we try to immediately switch to - * that node evaluation, without leaving the thread. - * - * It's only doable if the child don't have extra relations or all they - * are satisfied. - * - * TODO(sergey): Checks here can be de-duplicated with the ones from - * schedule_node(), however, how to do it nicely? - */ - if (node->outlinks.size() == 1) { - DepsRelation *rel = node->outlinks[0]; - OperationDepsNode *child = (OperationDepsNode *)rel->to; - BLI_assert(child->type == DEPSNODE_TYPE_OPERATION); - if (!child->scheduled) { - unsigned int id_layers = child->owner->owner->layers; - if (!((child->flag & DEPSOP_FLAG_NEEDS_UPDATE) != 0 && - (id_layers & state->layers) != 0)) - { - /* Node does not need an update, so can;t continue with the - * chain and need to switch to another one by leaving the - * thread. - */ - break; - } - if ((rel->flag & DEPSREL_FLAG_CYCLIC) == 0) { - BLI_assert(child->num_links_pending > 0); - atomic_sub_and_fetch_uint32(&child->num_links_pending, 1); - } - if (child->num_links_pending == 0) { - bool is_scheduled = atomic_fetch_and_or_uint8( - (uint8_t *)&child->scheduled, (uint8_t)true); - if (!is_scheduled) { - /* Node was not scheduled, switch to it! */ - node = child; - } - else { - /* Someone else scheduled the node, leaving us - * unemployed in this thread, we're leaving. - */ - break; - } - } - else { - /* There are other dependencies on the child, can't do - * anything in the current thread. - */ - break; - } - } - else { - /* Happens when having cyclic dependencies. - * - * Nothing to do here, single child was already scheduled, we - * can leave the thread now. - */ - break; - } - } - else { - /* TODO(sergey): It's possible to use one of the outgoing relations - * as a chain which we'll try to keep alive, but it's a bit more - * involved change. - */ - schedule_children(pool, state->graph, node, state->layers, thread_id); - break; - } } + + schedule_children(pool, state->graph, node, state->layers, thread_id); } typedef struct CalculatePengindData { @@ -378,12 +311,19 @@ void deg_evaluate_on_refresh(EvaluationContext *eval_ctx, state.graph = graph; state.layers = layers; - TaskScheduler *task_scheduler = BLI_task_scheduler_get(); - TaskPool *task_pool = BLI_task_pool_create(task_scheduler, &state); + TaskScheduler *task_scheduler; + bool need_free_scheduler; if (G.debug & G_DEBUG_DEPSGRAPH_NO_THREADS) { - BLI_pool_set_num_threads(task_pool, 1); + task_scheduler = BLI_task_scheduler_create(1); + need_free_scheduler = true; } + else { + task_scheduler = BLI_task_scheduler_get(); + need_free_scheduler = false; + } + + TaskPool *task_pool = BLI_task_pool_create_suspended(task_scheduler, &state); calculate_pending_parents(graph, layers); @@ -410,6 +350,10 @@ void deg_evaluate_on_refresh(EvaluationContext *eval_ctx, /* Clear any uncleared tags - just in case. */ deg_graph_clear_tags(graph); + + if (need_free_scheduler) { + BLI_task_scheduler_free(task_scheduler); + } } } // namespace DEG diff --git a/source/blender/depsgraph/intern/eval/deg_eval_flush.cc b/source/blender/depsgraph/intern/eval/deg_eval_flush.cc index 7c6c25bef0d..e10f86f6e95 100644 --- a/source/blender/depsgraph/intern/eval/deg_eval_flush.cc +++ b/source/blender/depsgraph/intern/eval/deg_eval_flush.cc @@ -180,6 +180,11 @@ void deg_graph_flush_updates(Main *bmain, Depsgraph *graph) comp_node->done = 1; /* Flush to nodes along links... */ + /* TODO(sergey): This is mainly giving speedup due ot less queue pushes, which + * reduces number of memory allocations. + * + * We should try solve the allocation issue instead of doing crazy things here. + */ if (node->outlinks.size() == 1) { OperationDepsNode *to_node = (OperationDepsNode *)node->outlinks[0]->to; if (to_node->scheduled == false) { diff --git a/source/blender/editors/animation/anim_channels_defines.c b/source/blender/editors/animation/anim_channels_defines.c index 57302c18a88..4d4f8c1298a 100644 --- a/source/blender/editors/animation/anim_channels_defines.c +++ b/source/blender/editors/animation/anim_channels_defines.c @@ -3856,7 +3856,8 @@ void ANIM_channel_draw(bAnimContext *ac, bAnimListElem *ale, float yminc, float if (ac->sl) { if ((ac->spacetype == SPACE_IPO) && (acf->has_setting(ac, ale, ACHANNEL_SETTING_VISIBLE) || - acf->has_setting(ac, ale, ACHANNEL_SETTING_ALWAYS_VISIBLE))) { + acf->has_setting(ac, ale, ACHANNEL_SETTING_ALWAYS_VISIBLE))) + { /* for F-Curves, draw color-preview of curve behind checkbox */ if (ELEM(ale->type, ANIMTYPE_FCURVE, ANIMTYPE_NLACURVE)) { FCurve *fcu = (FCurve *)ale->data; diff --git a/source/blender/editors/animation/anim_draw.c b/source/blender/editors/animation/anim_draw.c index a4ba95420c1..98900812bb2 100644 --- a/source/blender/editors/animation/anim_draw.c +++ b/source/blender/editors/animation/anim_draw.c @@ -117,7 +117,8 @@ void ANIM_draw_cfra(const bContext *C, View2D *v2d, short flag) /* Draw a light green line to indicate current frame */ UI_ThemeColor(TH_CFRAME); - const float x = (float)(scene->r.cfra * scene->r.framelen); + const float time = scene->r.cfra + scene->r.subframe; + const float x = (float)(time * scene->r.framelen); glLineWidth((flag & DRAWCFRA_WIDE) ? 3.0 : 2.0); diff --git a/source/blender/editors/animation/anim_ops.c b/source/blender/editors/animation/anim_ops.c index c0d6963acbb..bb73cbf03b4 100644 --- a/source/blender/editors/animation/anim_ops.c +++ b/source/blender/editors/animation/anim_ops.c @@ -95,7 +95,7 @@ static void change_frame_apply(bContext *C, wmOperator *op) { Main *bmain = CTX_data_main(C); Scene *scene = CTX_data_scene(C); - int frame = RNA_int_get(op->ptr, "frame"); + float frame = RNA_float_get(op->ptr, "frame"); bool do_snap = RNA_boolean_get(op->ptr, "snap"); if (do_snap && CTX_wm_space_seq(C)) { @@ -103,10 +103,15 @@ static void change_frame_apply(bContext *C, wmOperator *op) } /* set the new frame number */ - CFRA = frame; + CFRA = (int)frame; + if (scene->r.flag & SCER_SHOW_SUBFRAME) { + SUBFRA = frame - (int)frame; + } + else { + SUBFRA = 0.0f; + } FRAMENUMBER_MIN_CLAMP(CFRA); - SUBFRA = 0.0f; - + /* do updates */ BKE_sound_seek_scene(bmain, scene); WM_event_add_notifier(C, NC_SCENE | ND_FRAME, scene); @@ -125,18 +130,18 @@ static int change_frame_exec(bContext *C, wmOperator *op) /* ---- */ /* Get frame from mouse coordinates */ -static int frame_from_event(bContext *C, const wmEvent *event) +static float frame_from_event(bContext *C, const wmEvent *event) { ARegion *region = CTX_wm_region(C); Scene *scene = CTX_data_scene(C); float viewx; - int frame; + float frame; /* convert from region coordinates to View2D 'tot' space */ viewx = UI_view2d_region_to_view_x(®ion->v2d, event->mval[0]); /* round result to nearest int (frames are ints!) */ - frame = iroundf(viewx); + frame = viewx; if (scene->r.flag & SCER_LOCK_FRAME_SELECTION) { CLAMP(frame, PSFRA, PEFRA); @@ -187,7 +192,7 @@ static int change_frame_invoke(bContext *C, wmOperator *op, const wmEvent *event * as user could click on a single frame (jump to frame) as well as * click-dragging over a range (modal scrubbing). */ - RNA_int_set(op->ptr, "frame", frame_from_event(C, event)); + RNA_float_set(op->ptr, "frame", frame_from_event(C, event)); change_frame_seq_preview_begin(C, event); @@ -215,7 +220,7 @@ static int change_frame_modal(bContext *C, wmOperator *op, const wmEvent *event) break; case MOUSEMOVE: - RNA_int_set(op->ptr, "frame", frame_from_event(C, event)); + RNA_float_set(op->ptr, "frame", frame_from_event(C, event)); change_frame_apply(C, op); break; @@ -268,7 +273,7 @@ static void ANIM_OT_change_frame(wmOperatorType *ot) ot->undo_group = "FRAME_CHANGE"; /* rna */ - ot->prop = RNA_def_int(ot->srna, "frame", 0, MINAFRAME, MAXFRAME, "Frame", "", MINAFRAME, MAXFRAME); + ot->prop = RNA_def_float(ot->srna, "frame", 0, MINAFRAME, MAXFRAME, "Frame", "", MINAFRAME, MAXFRAME); prop = RNA_def_boolean(ot->srna, "snap", false, "Snap", ""); RNA_def_property_flag(prop, PROP_SKIP_SAVE); } diff --git a/source/blender/editors/armature/armature_intern.h b/source/blender/editors/armature/armature_intern.h index b39b4bd81ee..190b0610059 100644 --- a/source/blender/editors/armature/armature_intern.h +++ b/source/blender/editors/armature/armature_intern.h @@ -247,8 +247,10 @@ void armature_select_mirrored_ex(struct bArmature *arm, const int flag); void armature_select_mirrored(struct bArmature *arm); void armature_tag_unselect(struct bArmature *arm); -void *get_nearest_bone(struct bContext *C, short findunsel, int x, int y); -void *get_bone_from_selectbuffer(struct Scene *scene, struct Base *base, unsigned int *buffer, short hits, short findunsel, bool do_nearest); +void *get_nearest_bone(struct bContext *C, const int xy[2], bool findunsel); +void *get_bone_from_selectbuffer( + struct Scene *scene, struct Base *base, const unsigned int *buffer, short hits, + bool findunsel, bool do_nearest); int bone_looper(struct Object *ob, struct Bone *bone, void *data, int (*bone_func)(struct Object *, struct Bone *, void *)); diff --git a/source/blender/editors/armature/armature_naming.c b/source/blender/editors/armature/armature_naming.c index fa192ed6f36..c928508237d 100644 --- a/source/blender/editors/armature/armature_naming.c +++ b/source/blender/editors/armature/armature_naming.c @@ -362,7 +362,7 @@ static int armature_flip_names_exec(bContext *C, wmOperator *UNUSED(op)) arm = ob->data; - ListBase bones_names= {NULL}; + ListBase bones_names = {NULL}; CTX_DATA_BEGIN(C, EditBone *, ebone, selected_editable_bones) { diff --git a/source/blender/editors/armature/armature_select.c b/source/blender/editors/armature/armature_select.c index e9946abba0b..d19862cb4b0 100644 --- a/source/blender/editors/armature/armature_select.c +++ b/source/blender/editors/armature/armature_select.c @@ -53,6 +53,8 @@ #include "ED_screen.h" #include "ED_view3d.h" +#include "GPU_select.h" + #include "armature_intern.h" /* utility macros for storing a temp int in the bone (selection flag) */ @@ -74,7 +76,9 @@ Bone *get_indexed_bone(Object *ob, int index) /* See if there are any selected bones in this buffer */ /* only bones from base are checked on */ -void *get_bone_from_selectbuffer(Scene *scene, Base *base, unsigned int *buffer, short hits, short findunsel, bool do_nearest) +void *get_bone_from_selectbuffer( + Scene *scene, Base *base, const unsigned int *buffer, short hits, + bool findunsel, bool do_nearest) { Object *obedit = scene->obedit; // XXX get from context Bone *bone; @@ -103,8 +107,8 @@ void *get_bone_from_selectbuffer(Scene *scene, Base *base, unsigned int *buffer, sel = (bone->flag & BONE_SELECTED); else sel = !(bone->flag & BONE_SELECTED); - - data = bone; + + data = bone; } else { data = NULL; @@ -162,7 +166,7 @@ void *get_bone_from_selectbuffer(Scene *scene, Base *base, unsigned int *buffer, /* used by posemode as well editmode */ /* only checks scene->basact! */ /* x and y are mouse coords (area space) */ -void *get_nearest_bone(bContext *C, short findunsel, int x, int y) +void *get_nearest_bone(bContext *C, const int xy[2], bool findunsel) { ViewContext vc; rcti rect; @@ -172,10 +176,10 @@ void *get_nearest_bone(bContext *C, short findunsel, int x, int y) view3d_set_viewcontext(C, &vc); // rect.xmin = ... mouseco! - rect.xmin = rect.xmax = x; - rect.ymin = rect.ymax = y; + rect.xmin = rect.xmax = xy[0]; + rect.ymin = rect.ymax = xy[1]; - hits = view3d_opengl_select(&vc, buffer, MAXPICKBUF, &rect, true); + hits = view3d_opengl_select(&vc, buffer, MAXPICKBUF, &rect, VIEW3D_SELECT_PICK_NEAREST); if (hits > 0) return get_bone_from_selectbuffer(vc.scene, vc.scene->basact, buffer, hits, findunsel, true); @@ -197,10 +201,7 @@ static int armature_select_linked_invoke(bContext *C, wmOperator *op, const wmEv view3d_operator_needs_opengl(C); - if (extend) - bone = get_nearest_bone(C, 0, event->mval[0], event->mval[1]); - else - bone = get_nearest_bone(C, 1, event->mval[0], event->mval[1]); + bone = get_nearest_bone(C, event->mval, !extend); if (!bone) return OPERATOR_CANCELLED; @@ -276,10 +277,24 @@ void ARMATURE_OT_select_linked(wmOperatorType *ot) RNA_def_boolean(ot->srna, "extend", false, "Extend", "Extend selection instead of deselecting everything first"); } +/* utility function for get_nearest_editbonepoint */ +static int selectbuffer_ret_hits_12(unsigned int *UNUSED(buffer), const int hits12) +{ + return hits12; +} + +static int selectbuffer_ret_hits_5(unsigned int *buffer, const int hits12, const int hits5) +{ + const int offs = 4 * hits12; + memcpy(buffer, buffer + offs, 4 * hits5 * sizeof(unsigned int)); + return hits5; +} + /* does bones and points */ /* note that BONE ROOT only gets drawn for root bones (or without IK) */ -static EditBone *get_nearest_editbonepoint(ViewContext *vc, const int mval[2], - ListBase *edbo, int findunsel, int *selmask) +static EditBone *get_nearest_editbonepoint( + ViewContext *vc, const int mval[2], + ListBase *edbo, bool findunsel, bool use_cycle, int *r_selmask) { bArmature *arm = (bArmature *)vc->obedit->data; EditBone *ebone_next_act = arm->act_edbone; @@ -289,7 +304,9 @@ static EditBone *get_nearest_editbonepoint(ViewContext *vc, const int mval[2], unsigned int buffer[MAXPICKBUF]; unsigned int hitresult, besthitresult = BONESEL_NOSEL; int i, mindep = 5; - short hits; + int hits12, hits5 = 0; + + static int last_mval[2] = {-100, -100}; /* find the bone after the current active bone, so as to bump up its chances in selection. * this way overlapping bones will cycle selection state as with objects. */ @@ -303,22 +320,59 @@ static EditBone *get_nearest_editbonepoint(ViewContext *vc, const int mval[2], ebone_next_act = NULL; } - rect.xmin = mval[0] - 5; - rect.xmax = mval[0] + 5; - rect.ymin = mval[1] - 5; - rect.ymax = mval[1] + 5; - - hits = view3d_opengl_select(vc, buffer, MAXPICKBUF, &rect, true); - if (hits == 0) { - rect.xmin = mval[0] - 12; - rect.xmax = mval[0] + 12; - rect.ymin = mval[1] - 12; - rect.ymax = mval[1] + 12; - hits = view3d_opengl_select(vc, buffer, MAXPICKBUF, &rect, true); + bool do_nearest = false; + + /* define if we use solid nearest select or not */ + if (use_cycle) { + if (vc->v3d->drawtype > OB_WIRE) { + do_nearest = true; + if (len_manhattan_v2v2_int(mval, last_mval) < 3) { + do_nearest = false; + } + } + copy_v2_v2_int(last_mval, mval); + } + else { + if (vc->v3d->drawtype > OB_WIRE) { + do_nearest = true; + } + } + + /* matching logic from 'mixed_bones_object_selectbuffer' */ + const int select_mode = (do_nearest ? VIEW3D_SELECT_PICK_NEAREST : VIEW3D_SELECT_PICK_ALL); + int hits = 0; + + /* we _must_ end cache before return, use 'goto cache_end' */ + GPU_select_cache_begin(); + + BLI_rcti_init_pt_radius(&rect, mval, 12); + hits12 = view3d_opengl_select(vc, buffer, MAXPICKBUF, &rect, select_mode); + if (hits12 == 1) { + hits = selectbuffer_ret_hits_12(buffer, hits12); + goto cache_end; + } + else if (hits12 > 0) { + int offs; + + offs = 4 * hits12; + BLI_rcti_init_pt_radius(&rect, mval, 5); + hits5 = view3d_opengl_select(vc, buffer + offs, MAXPICKBUF - offs, &rect, select_mode); + + if (hits5 == 1) { + hits = selectbuffer_ret_hits_5(buffer, hits12, hits5); + goto cache_end; + } + + if (hits5 > 0) { hits = selectbuffer_ret_hits_5(buffer, hits12, hits5); goto cache_end; } + else { hits = selectbuffer_ret_hits_12(buffer, hits12); goto cache_end; } } + +cache_end: + GPU_select_cache_end(); + /* See if there are any selected bones in this group */ if (hits > 0) { - + if (hits == 1) { if (!(buffer[3] & BONESEL_NOSEL)) besthitresult = buffer[3]; @@ -375,17 +429,17 @@ static EditBone *get_nearest_editbonepoint(ViewContext *vc, const int mval[2], ebone = BLI_findlink(edbo, besthitresult & ~BONESEL_ANY); - *selmask = 0; + *r_selmask = 0; if (besthitresult & BONESEL_ROOT) - *selmask |= BONE_ROOTSEL; + *r_selmask |= BONE_ROOTSEL; if (besthitresult & BONESEL_TIP) - *selmask |= BONE_TIPSEL; + *r_selmask |= BONE_TIPSEL; if (besthitresult & BONESEL_BONE) - *selmask |= BONE_SELECTED; + *r_selmask |= BONE_SELECTED; return ebone; } } - *selmask = 0; + *r_selmask = 0; return NULL; } @@ -439,8 +493,8 @@ bool ED_armature_select_pick(bContext *C, const int mval[2], bool extend, bool d if (BIF_sk_selectStroke(C, mval, extend)) { return true; } - - nearBone = get_nearest_editbonepoint(&vc, mval, arm->edbo, 1, &selmask); + + nearBone = get_nearest_editbonepoint(&vc, mval, arm->edbo, true, true, &selmask); if (nearBone) { if (!extend && !deselect && !toggle) { @@ -1202,7 +1256,7 @@ static int armature_shortest_path_pick_invoke(bContext *C, wmOperator *op, const view3d_operator_needs_opengl(C); ebone_src = arm->act_edbone; - ebone_dst = get_nearest_bone(C, 0, event->mval[0], event->mval[1]); + ebone_dst = get_nearest_bone(C, event->mval, false); /* fallback to object selection */ if (ELEM(NULL, ebone_src, ebone_dst) || (ebone_src == ebone_dst)) { diff --git a/source/blender/editors/armature/editarmature_sketch.c b/source/blender/editors/armature/editarmature_sketch.c index f6c04e9570a..bba486bc65c 100644 --- a/source/blender/editors/armature/editarmature_sketch.c +++ b/source/blender/editors/armature/editarmature_sketch.c @@ -1907,12 +1907,9 @@ static bool sk_selectStroke(bContext *C, SK_Sketch *sketch, const int mval[2], c view3d_set_viewcontext(C, &vc); - rect.xmin = mval[0] - 5; - rect.xmax = mval[0] + 5; - rect.ymin = mval[1] - 5; - rect.ymax = mval[1] + 5; + BLI_rcti_init_pt_radius(&rect, mval, 5); - hits = view3d_opengl_select(&vc, buffer, MAXPICKBUF, &rect, true); + hits = view3d_opengl_select(&vc, buffer, MAXPICKBUF, &rect, VIEW3D_SELECT_PICK_NEAREST); if (hits > 0) { int besthitresult = -1; diff --git a/source/blender/editors/armature/pose_select.c b/source/blender/editors/armature/pose_select.c index 44470c1f827..6e328552411 100644 --- a/source/blender/editors/armature/pose_select.c +++ b/source/blender/editors/armature/pose_select.c @@ -132,8 +132,9 @@ void ED_pose_bone_select(Object *ob, bPoseChannel *pchan, bool select) /* called from editview.c, for mode-less pose selection */ /* assumes scene obact and basact is still on old situation */ -int ED_do_pose_selectbuffer(Scene *scene, Base *base, unsigned int *buffer, short hits, - bool extend, bool deselect, bool toggle, bool do_nearest) +bool ED_do_pose_selectbuffer( + Scene *scene, Base *base, const unsigned int *buffer, short hits, + bool extend, bool deselect, bool toggle, bool do_nearest) { Object *ob = base->object; Bone *nearBone; @@ -280,12 +281,9 @@ static int pose_select_connected_invoke(bContext *C, wmOperator *op, const wmEve const bool extend = RNA_boolean_get(op->ptr, "extend"); view3d_operator_needs_opengl(C); - - if (extend) - bone = get_nearest_bone(C, 0, event->mval[0], event->mval[1]); - else - bone = get_nearest_bone(C, 1, event->mval[0], event->mval[1]); - + + bone = get_nearest_bone(C, event->mval, !extend); + if (!bone) return OPERATOR_CANCELLED; diff --git a/source/blender/editors/curve/editcurve.c b/source/blender/editors/curve/editcurve.c index e9fd5fb5a43..47f42ab5321 100644 --- a/source/blender/editors/curve/editcurve.c +++ b/source/blender/editors/curve/editcurve.c @@ -91,14 +91,6 @@ typedef struct { int flag; } UndoCurve; -/* Definitions needed for shape keys */ -typedef struct { - void *orig_cv; - int key_index, nu_index, pt_index, vertex_index; - bool switched; - Nurb *orig_nu; -} CVKeyIndex; - void selectend_nurb(Object *obedit, enum eEndPoint_Types selfirst, bool doswap, bool selstatus); static void adduplicateflagNurb(Object *obedit, ListBase *newnurb, const short flag, const bool split); static int curve_delete_segments(Object *obedit, const bool split); @@ -138,9 +130,9 @@ void printknots(Object *obedit) /* ********************* Shape keys *************** */ -static CVKeyIndex *init_cvKeyIndex(void *cv, int key_index, int nu_index, int pt_index, int vertex_index, Nurb *orig_nu) +static CVKeyIndex *init_cvKeyIndex(void *cv, int key_index, int nu_index, int pt_index, int vertex_index) { - CVKeyIndex *cvIndex = MEM_callocN(sizeof(CVKeyIndex), "init_cvKeyIndex"); + CVKeyIndex *cvIndex = MEM_callocN(sizeof(CVKeyIndex), __func__); cvIndex->orig_cv = cv; cvIndex->key_index = key_index; @@ -148,7 +140,6 @@ static CVKeyIndex *init_cvKeyIndex(void *cv, int key_index, int nu_index, int pt cvIndex->pt_index = pt_index; cvIndex->vertex_index = vertex_index; cvIndex->switched = false; - cvIndex->orig_nu = orig_nu; return cvIndex; } @@ -174,7 +165,12 @@ static void init_editNurb_keyIndex(EditNurb *editnurb, ListBase *origBase) origbezt = orignu->bezt; pt_index = 0; while (a--) { - keyIndex = init_cvKeyIndex(origbezt, key_index, nu_index, pt_index, vertex_index, orignu); + /* We cannot keep *any* reference to curve obdata, + * it might be replaced and freed while editcurve remain in use (in viewport render case e.g.). + * Note that we could use a pool to avoid lots of malloc's here, but... not really a problem for now. */ + BezTriple *origbezt_cpy = MEM_mallocN(sizeof(*origbezt), __func__); + *origbezt_cpy = *origbezt; + keyIndex = init_cvKeyIndex(origbezt_cpy, key_index, nu_index, pt_index, vertex_index); BLI_ghash_insert(gh, bezt, keyIndex); key_index += 12; vertex_index += 3; @@ -189,7 +185,12 @@ static void init_editNurb_keyIndex(EditNurb *editnurb, ListBase *origBase) origbp = orignu->bp; pt_index = 0; while (a--) { - keyIndex = init_cvKeyIndex(origbp, key_index, nu_index, pt_index, vertex_index, orignu); + /* We cannot keep *any* reference to curve obdata, + * it might be replaced and freed while editcurve remain in use (in viewport render case e.g.). + * Note that we could use a pool to avoid lots of malloc's here, but... not really a problem for now. */ + BPoint *origbp_cpy = MEM_mallocN(sizeof(*origbp_cpy), __func__); + *origbp_cpy = *origbp; + keyIndex = init_cvKeyIndex(origbp_cpy, key_index, nu_index, pt_index, vertex_index); BLI_ghash_insert(gh, bp, keyIndex); key_index += 4; bp++; @@ -250,23 +251,22 @@ static int getKeyIndexOrig_keyIndex(EditNurb *editnurb, void *cv) return index->key_index; } -static void keyIndex_delCV(EditNurb *editnurb, const void *cv) +static void keyIndex_delBezt(EditNurb *editnurb, BezTriple *bezt) { if (!editnurb->keyindex) { return; } - BLI_ghash_remove(editnurb->keyindex, cv, NULL, MEM_freeN); -} - -static void keyIndex_delBezt(EditNurb *editnurb, BezTriple *bezt) -{ - keyIndex_delCV(editnurb, bezt); + BKE_curve_editNurb_keyIndex_delCV(editnurb->keyindex, bezt); } static void keyIndex_delBP(EditNurb *editnurb, BPoint *bp) { - keyIndex_delCV(editnurb, bp); + if (!editnurb->keyindex) { + return; + } + + BKE_curve_editNurb_keyIndex_delCV(editnurb->keyindex, bp); } static void keyIndex_delNurb(EditNurb *editnurb, Nurb *nu) @@ -282,7 +282,7 @@ static void keyIndex_delNurb(EditNurb *editnurb, Nurb *nu) a = nu->pntsu; while (a--) { - BLI_ghash_remove(editnurb->keyindex, bezt, NULL, MEM_freeN); + BKE_curve_editNurb_keyIndex_delCV(editnurb->keyindex, bezt); bezt++; } } @@ -291,7 +291,7 @@ static void keyIndex_delNurb(EditNurb *editnurb, Nurb *nu) a = nu->pntsu * nu->pntsv; while (a--) { - BLI_ghash_remove(editnurb->keyindex, bp, NULL, MEM_freeN); + BKE_curve_editNurb_keyIndex_delCV(editnurb->keyindex, bp); bp++; } } @@ -535,6 +535,7 @@ static GHash *dupli_keyIndexHash(GHash *keyindex) CVKeyIndex *newIndex = MEM_mallocN(sizeof(CVKeyIndex), "dupli_keyIndexHash index"); memcpy(newIndex, index, sizeof(CVKeyIndex)); + newIndex->orig_cv = MEM_dupallocN(index->orig_cv); BLI_ghash_insert(gh, cv, newIndex); } @@ -624,7 +625,7 @@ static void calc_keyHandles(ListBase *nurb, float *key) } } -static void calc_shapeKeys(Object *obedit) +static void calc_shapeKeys(Object *obedit, ListBase *newnurbs) { Curve *cu = (Curve *)obedit->data; @@ -636,7 +637,7 @@ static void calc_shapeKeys(Object *obedit) KeyBlock *actkey = BLI_findlink(&cu->key->block, editnurb->shapenr - 1); BezTriple *bezt, *oldbezt; BPoint *bp, *oldbp; - Nurb *nu; + Nurb *nu, *newnu; int totvert = BKE_nurbList_verts_count(&editnurb->nurbs); float (*ofs)[3] = NULL; @@ -706,20 +707,25 @@ static void calc_shapeKeys(Object *obedit) currkey = cu->key->block.first; while (currkey) { - int apply_offset = (ofs && (currkey != actkey) && (editnurb->shapenr - 1 == currkey->relative)); + const bool apply_offset = (ofs && (currkey != actkey) && (editnurb->shapenr - 1 == currkey->relative)); float *fp = newkey = MEM_callocN(cu->key->elemsize * totvert, "currkey->data"); ofp = oldkey = currkey->data; nu = editnurb->nurbs.first; + /* We need to restore to original curve into newnurb, *not* editcurve's nurbs. + * Otherwise, in case we update obdata *without* leaving editmode (e.g. viewport render), we would + * invalidate editcurve. */ + newnu = newnurbs->first; i = 0; while (nu) { if (currkey == actkey) { - int restore = actkey != cu->key->refkey; + const bool restore = actkey != cu->key->refkey; if (nu->bezt) { bezt = nu->bezt; a = nu->pntsu; + BezTriple *newbezt = newnu->bezt; while (a--) { int j; oldbezt = getKeyIndexOrig_bezt(editnurb, bezt); @@ -728,7 +734,7 @@ static void calc_shapeKeys(Object *obedit) copy_v3_v3(fp, bezt->vec[j]); if (restore && oldbezt) { - copy_v3_v3(bezt->vec[j], oldbezt->vec[j]); + copy_v3_v3(newbezt->vec[j], oldbezt->vec[j]); } fp += 3; @@ -736,16 +742,18 @@ static void calc_shapeKeys(Object *obedit) fp[0] = bezt->alfa; if (restore && oldbezt) { - bezt->alfa = oldbezt->alfa; + newbezt->alfa = oldbezt->alfa; } fp += 3; ++i; /* alphas */ bezt++; + newbezt++; } } else { bp = nu->bp; a = nu->pntsu * nu->pntsv; + BPoint *newbp = newnu->bp; while (a--) { oldbp = getKeyIndexOrig_bp(editnurb, bp); @@ -754,12 +762,13 @@ static void calc_shapeKeys(Object *obedit) fp[3] = bp->alfa; if (restore && oldbp) { - copy_v3_v3(bp->vec, oldbp->vec); - bp->alfa = oldbp->alfa; + copy_v3_v3(newbp->vec, oldbp->vec); + newbp->alfa = oldbp->alfa; } fp += 4; bp++; + newbp++; i += 2; } } @@ -1204,9 +1213,13 @@ void ED_curve_editnurb_load(Object *obedit) } } + /* We have to pass also new copied nurbs, since we want to restore original curve (without edited shapekey) + * on obdata, but *not* on editcurve itself (ED_curve_editnurb_load call does not always implies freeing + * of editcurve, e.g. when called to generate render data...). */ + calc_shapeKeys(obedit, &newnurb); + cu->nurb = newnurb; - calc_shapeKeys(obedit); ED_curve_updateAnimPaths(obedit->data); BKE_nurbList_free(&oldnurb); @@ -1227,13 +1240,11 @@ void ED_curve_editnurb_make(Object *obedit) if (actkey) { // XXX strcpy(G.editModeTitleExtra, "(Key) "); undo_editmode_clear(); - BKE_keyblock_convert_to_curve(actkey, cu, &cu->nurb); } if (editnurb) { BKE_nurbList_free(&editnurb->nurbs); - BKE_curve_editNurb_keyIndex_free(editnurb); - editnurb->keyindex = NULL; + BKE_curve_editNurb_keyIndex_free(&editnurb->keyindex); } else { editnurb = MEM_callocN(sizeof(EditNurb), "editnurb"); @@ -1248,12 +1259,16 @@ void ED_curve_editnurb_make(Object *obedit) nu = nu->next; } - if (actkey) - editnurb->shapenr = obedit->shapenr; - /* animation could be added in editmode even if there was no animdata in * object mode hence we always need CVs index be created */ init_editNurb_keyIndex(editnurb, &cu->nurb); + + if (actkey) { + editnurb->shapenr = obedit->shapenr; + /* Apply shapekey to new nurbs of editnurb, not those of original curve (and *after* we generated keyIndex), + * else we do not have valid 'original' data to properly restore curve when leaving editmode. */ + BKE_keyblock_convert_to_curve(actkey, cu, &editnurb->nurbs); + } } } @@ -1309,8 +1324,7 @@ static int separate_exec(bContext *C, wmOperator *op) ED_curve_editnurb_make(newob); newedit = newcu->editnurb; BKE_nurbList_free(&newedit->nurbs); - BKE_curve_editNurb_keyIndex_free(newedit); - newedit->keyindex = NULL; + BKE_curve_editNurb_keyIndex_free(&newedit->keyindex); BLI_movelisttolist(&newedit->nurbs, &newnurb); /* 4. put old object out of editmode and delete separated geometry */ @@ -6110,7 +6124,7 @@ static void undoCurve_to_editCurve(void *ucu, void *UNUSED(edata), void *cu_v) BKE_nurbList_free(editbase); if (undoCurve->undoIndex) { - BLI_ghash_free(editnurb->keyindex, NULL, MEM_freeN); + BKE_curve_editNurb_keyIndex_free(&editnurb->keyindex); editnurb->keyindex = dupli_keyIndexHash(undoCurve->undoIndex); } @@ -6188,8 +6202,7 @@ static void free_undoCurve(void *ucv) BKE_nurbList_free(&undoCurve->nubase); - if (undoCurve->undoIndex) - BLI_ghash_free(undoCurve->undoIndex, NULL, MEM_freeN); + BKE_curve_editNurb_keyIndex_free(&undoCurve->undoIndex); free_fcurves(&undoCurve->fcurves); free_fcurves(&undoCurve->drivers); diff --git a/source/blender/editors/gpencil/gpencil_edit.c b/source/blender/editors/gpencil/gpencil_edit.c index e118e490f25..fa9acc36a2b 100644 --- a/source/blender/editors/gpencil/gpencil_edit.c +++ b/source/blender/editors/gpencil/gpencil_edit.c @@ -74,7 +74,6 @@ #include "ED_object.h" #include "ED_screen.h" #include "ED_view3d.h" -#include "ED_screen.h" #include "ED_space_api.h" #include "gpencil_intern.h" diff --git a/source/blender/editors/include/ED_armature.h b/source/blender/editors/include/ED_armature.h index 7ad61671b1b..6b8943421bd 100644 --- a/source/blender/editors/include/ED_armature.h +++ b/source/blender/editors/include/ED_armature.h @@ -130,8 +130,9 @@ void ED_armature_ebone_listbase_temp_clear(struct ListBase *lb); void ED_armature_deselect_all(struct Object *obedit); void ED_armature_deselect_all_visible(struct Object *obedit); -int ED_do_pose_selectbuffer(struct Scene *scene, struct Base *base, unsigned int *buffer, - short hits, bool extend, bool deselect, bool toggle, bool do_nearest); +bool ED_do_pose_selectbuffer( + struct Scene *scene, struct Base *base, const unsigned int *buffer, short hits, + bool extend, bool deselect, bool toggle, bool do_nearest); bool ED_armature_select_pick(struct bContext *C, const int mval[2], bool extend, bool deselect, bool toggle); int join_armature_exec(struct bContext *C, struct wmOperator *op); struct Bone *get_indexed_bone(struct Object *ob, int index); diff --git a/source/blender/editors/include/ED_view3d.h b/source/blender/editors/include/ED_view3d.h index 79176d9e9cf..af6f37d937c 100644 --- a/source/blender/editors/include/ED_view3d.h +++ b/source/blender/editors/include/ED_view3d.h @@ -47,6 +47,7 @@ struct Main; struct MetaElem; struct Nurb; struct Object; +struct RV3DMatrixStore; struct RegionView3D; struct Scene; struct ScrArea; @@ -301,7 +302,19 @@ bool ED_view3d_autodist_depth_seg(struct ARegion *ar, const int mval_sta[2], con /* select */ #define MAXPICKELEMS 2500 #define MAXPICKBUF (4 * MAXPICKELEMS) -short view3d_opengl_select(struct ViewContext *vc, unsigned int *buffer, unsigned int bufsize, const struct rcti *input, bool do_nearest); + +enum { + /* all elements in the region, ignore depth */ + VIEW3D_SELECT_ALL = 0, + /* pick also depth sorts (only for small regions!) */ + VIEW3D_SELECT_PICK_ALL = 1, + /* sorts and only returns visible objects (only for small regions!) */ + VIEW3D_SELECT_PICK_NEAREST = 2, +}; + +int view3d_opengl_select( + struct ViewContext *vc, unsigned int *buffer, unsigned int bufsize, const struct rcti *input, + int select_mode); /* view3d_select.c */ float ED_view3d_select_dist_px(void); @@ -330,8 +343,8 @@ void ED_view3d_check_mats_rv3d(struct RegionView3D *rv3d); #endif int ED_view3d_scene_layer_set(int lay, const int *values, int *active); -void *ED_view3d_mats_rv3d_backup(struct RegionView3D *rv3d); -void ED_view3d_mats_rv3d_restore(struct RegionView3D *rv3d, void *rv3dmat_pt); +struct RV3DMatrixStore *ED_view3d_mats_rv3d_backup(struct RegionView3D *rv3d); +void ED_view3d_mats_rv3d_restore(struct RegionView3D *rv3d, struct RV3DMatrixStore *rv3dmat); bool ED_view3d_context_activate(struct bContext *C); void ED_view3d_draw_offscreen_init(struct Scene *scene, struct View3D *v3d); diff --git a/source/blender/editors/interface/interface_handlers.c b/source/blender/editors/interface/interface_handlers.c index 734cd02a056..6e3c3c3674a 100644 --- a/source/blender/editors/interface/interface_handlers.c +++ b/source/blender/editors/interface/interface_handlers.c @@ -7741,7 +7741,8 @@ static void button_activate_state(bContext *C, uiBut *but, uiHandleButtonState s if (ui_but_is_cursor_warp(but)) { #ifdef USE_CONT_MOUSE_CORRECT - if (data->ungrab_mval[0] != FLT_MAX) { + /* stereo3d has issues with changing cursor location so rather avoid */ + if (data->ungrab_mval[0] != FLT_MAX && !WM_stereo3d_enabled(data->window, false)) { int mouse_ungrab_xy[2]; ui_block_to_window_fl(data->region, but->block, &data->ungrab_mval[0], &data->ungrab_mval[1]); mouse_ungrab_xy[0] = data->ungrab_mval[0]; diff --git a/source/blender/editors/interface/interface_layout.c b/source/blender/editors/interface/interface_layout.c index ca2538022b0..ce1153911da 100644 --- a/source/blender/editors/interface/interface_layout.c +++ b/source/blender/editors/interface/interface_layout.c @@ -189,7 +189,7 @@ static const char *ui_item_name_add_colon(const char *name, char namestr[UI_MAX_ return name; } -static int ui_item_fit(int item, int pos, int all, int available, bool is_last, int alignment) +static int ui_item_fit(int item, int pos, int all, int available, bool is_last, int alignment, float *extra_pixel) { /* available == 0 is unlimited */ if (available == 0) @@ -199,16 +199,22 @@ static int ui_item_fit(int item, int pos, int all, int available, bool is_last, /* contents is bigger than available space */ if (is_last) return available - pos; - else - return (item * available) / all; + else { + float width = *extra_pixel + (item * available) / (float)all; + *extra_pixel = width - (int)width; + return (int)width; + } } else { /* contents is smaller or equal to available space */ if (alignment == UI_LAYOUT_ALIGN_EXPAND) { if (is_last) return available - pos; - else - return (item * available) / all; + else { + float width = *extra_pixel + (item * available) / (float)all; + *extra_pixel = width - (int)width; + return (int)width; + } } else return item; @@ -302,6 +308,26 @@ static void ui_item_position(uiItem *item, int x, int y, int w, int h) } } +static void ui_item_move(uiItem *item, int delta_xmin, int delta_xmax) +{ + if (item->type == ITEM_BUTTON) { + uiButtonItem *bitem = (uiButtonItem *)item; + + bitem->but->rect.xmin += delta_xmin; + bitem->but->rect.xmax += delta_xmax; + + ui_but_update(bitem->but); /* for strlen */ + } + else { + uiLayout *litem = (uiLayout *)item; + + if (delta_xmin > 0) + litem->x += delta_xmin; + else + litem->w += delta_xmax; + } +} + /******************** Special RNA Items *********************/ static int ui_layout_local_dir(uiLayout *layout) @@ -1248,7 +1274,7 @@ static void ui_item_rna_size( if (!w) { if (type == PROP_ENUM && icon_only) { w = ui_text_icon_width(layout, "", ICON_BLANK1, 0); - w += 0.6f * UI_UNIT_X; + w += 0.5f * UI_UNIT_X; } else { w = ui_text_icon_width(layout, name, icon, 0); @@ -2099,9 +2125,10 @@ static int ui_litem_min_width(int itemw) static void ui_litem_layout_row(uiLayout *litem) { - uiItem *item; + uiItem *item, *last_free_item = NULL; int x, y, w, tot, totw, neww, newtotw, itemw, minw, itemh, offset; int fixedw, freew, fixedx, freex, flag = 0, lastw = 0; + float extra_pixel; /* x = litem->x; */ /* UNUSED */ y = litem->y; @@ -2128,6 +2155,7 @@ static void ui_litem_layout_row(uiLayout *litem) x = 0; flag = 0; newtotw = totw; + extra_pixel = 0.0f; for (item = litem->items.first; item; item = item->next) { if (item->flag & UI_ITEM_FIXED) @@ -2137,7 +2165,7 @@ static void ui_litem_layout_row(uiLayout *litem) minw = ui_litem_min_width(itemw); if (w - lastw > 0) - neww = ui_item_fit(itemw, x, totw, w - lastw, !item->next, litem->alignment); + neww = ui_item_fit(itemw, x, totw, w - lastw, !item->next, litem->alignment, &extra_pixel); else neww = 0; /* no space left, all will need clamping to minimum size */ @@ -2166,6 +2194,7 @@ static void ui_litem_layout_row(uiLayout *litem) freex = 0; fixedx = 0; + extra_pixel = 0.0f; x = litem->x; for (item = litem->items.first; item; item = item->next) { @@ -2177,13 +2206,14 @@ static void ui_litem_layout_row(uiLayout *litem) if (item->type != ITEM_BUTTON && item->flag & UI_ITEM_MIN) { minw = itemw; } - itemw = ui_item_fit(minw, fixedx, fixedw, min_ii(w, fixedw), !item->next, litem->alignment); + itemw = ui_item_fit(minw, fixedx, fixedw, min_ii(w, fixedw), !item->next, litem->alignment, &extra_pixel); fixedx += itemw; } else { /* free size item */ - itemw = ui_item_fit(itemw, freex, freew, w - fixedw, !item->next, litem->alignment); + itemw = ui_item_fit(itemw, freex, freew, w - fixedw, !item->next, litem->alignment, &extra_pixel); freex += itemw; + last_free_item = item; } /* align right/center */ @@ -2205,6 +2235,16 @@ static void ui_litem_layout_row(uiLayout *litem) x += litem->space; } + /* add extra pixel */ + uiItem *last_item = litem->items.last; + extra_pixel = litem->w - (x - litem->x); + if (extra_pixel > 0 && litem->alignment == UI_LAYOUT_ALIGN_EXPAND && + last_free_item && last_item && last_item->flag & UI_ITEM_FIXED) { + ui_item_move(last_free_item, 0, extra_pixel); + for (item = last_free_item->next; item; item = item->next) + ui_item_move(item, extra_pixel, extra_pixel); + } + litem->w = x - litem->x; litem->h = litem->y - y; litem->x = x; @@ -2216,7 +2256,6 @@ static void ui_litem_estimate_column(uiLayout *litem) { uiItem *item; int itemw, itemh; - bool min_size_flag = true; litem->w = 0; litem->h = 0; @@ -2224,18 +2263,12 @@ static void ui_litem_estimate_column(uiLayout *litem) for (item = litem->items.first; item; item = item->next) { ui_item_size(item, &itemw, &itemh); - min_size_flag = min_size_flag && (item->flag & UI_ITEM_MIN); - litem->w = MAX2(litem->w, itemw); litem->h += itemh; if (item->next) litem->h += litem->space; } - - if (min_size_flag) { - litem->item.flag |= UI_ITEM_MIN; - } } static void ui_litem_layout_column(uiLayout *litem) @@ -2648,13 +2681,14 @@ static void ui_litem_layout_absolute(uiLayout *litem) static void ui_litem_estimate_split(uiLayout *litem) { ui_litem_estimate_row(litem); + litem->item.flag &= ~UI_ITEM_MIN; } static void ui_litem_layout_split(uiLayout *litem) { uiLayoutItemSplit *split = (uiLayoutItemSplit *)litem; uiItem *item; - float percentage; + float percentage, extra_pixel = 0.0f; const int tot = BLI_listbase_count(&litem->items); int itemh, x, y, w, colw = 0; @@ -2677,7 +2711,9 @@ static void ui_litem_layout_split(uiLayout *litem) x += colw; if (item->next) { - colw = (w - (int)(w * percentage)) / (tot - 1); + const float width = extra_pixel + (w - (int)(w * percentage)) / ((float)tot - 1); + extra_pixel = width - (int)width; + colw = (int)width; colw = MAX2(colw, 0); x += litem->space; @@ -3134,8 +3170,6 @@ static void ui_item_align(uiLayout *litem, short nr) else if (item->type == ITEM_LAYOUT_BOX) { box = (uiLayoutItemBx *)item; box->roundbox->alignnr = nr; - BLI_remlink(&litem->root->block->buttons, box->roundbox); - BLI_addhead(&litem->root->block->buttons, box->roundbox); } else if (((uiLayout *)item)->align) { ui_item_align((uiLayout *)item, nr); diff --git a/source/blender/editors/interface/interface_templates.c b/source/blender/editors/interface/interface_templates.c index 4db1c845c23..62f12cd7967 100644 --- a/source/blender/editors/interface/interface_templates.c +++ b/source/blender/editors/interface/interface_templates.c @@ -430,7 +430,7 @@ static void template_ID( uiLayoutRow(layout, true); } else if (flag & UI_ID_BROWSE) { - but = uiDefBlockButN(block, id_search_menu, MEM_dupallocN(template), "", 0, 0, UI_UNIT_X * 1.6, UI_UNIT_Y, + but = uiDefBlockButN(block, id_search_menu, MEM_dupallocN(template), "", 0, 0, UI_UNIT_X * 1.5, UI_UNIT_Y, TIP_(template_id_browse_tip(type))); ui_def_but_icon(but, RNA_struct_ui_icon(type), UI_HAS_ICON); /* default dragging of icon for id browse buttons */ @@ -1978,6 +1978,7 @@ static void curvemap_tools_dofunc(bContext *C, void *cumap_v, int event) case UICURVE_FUNC_HANDLE_AUTO_ANIM: /* set auto-clamped */ curvemap_handle_set(cuma, HD_AUTO_ANIM); curvemapping_changed(cumap, false); + break; case UICURVE_FUNC_EXTEND_HOZ: /* extend horiz */ cuma->flag &= ~CUMA_EXTEND_EXTRAPOLATE; curvemapping_changed(cumap, false); diff --git a/source/blender/editors/interface/interface_widgets.c b/source/blender/editors/interface/interface_widgets.c index b3736a71e74..6e871b8ec92 100644 --- a/source/blender/editors/interface/interface_widgets.c +++ b/source/blender/editors/interface/interface_widgets.c @@ -873,15 +873,15 @@ static void widget_draw_icon( if (icon && icon != ICON_BLANK1) { float ofs = 1.0f / aspect; - if (but->drawflag & UI_BUT_ICON_LEFT) { + if (but->drawflag & UI_BUT_ICON_LEFT || ui_block_is_pie_menu(but->block)) { if (but->block->flag & UI_BLOCK_LOOP) { if (but->type == UI_BTYPE_SEARCH_MENU) xs = rect->xmin + 4.0f * ofs; else - xs = rect->xmin + ofs; + xs = rect->xmin + 2.0f * ofs; } else { - xs = rect->xmin + 4.0f * ofs; + xs = rect->xmin + 2.0f * ofs; } ys = (rect->ymin + rect->ymax - height) / 2.0f; } @@ -1554,11 +1554,11 @@ static void widget_draw_text_icon(uiFontStyle *fstyle, uiWidgetColors *wcol, uiB /* Icons on the left with optional text label on the right */ else if (but->flag & UI_HAS_ICON || show_menu_icon) { const BIFIconID icon = (but->flag & UI_HAS_ICON) ? but->icon + but->iconadd : ICON_NONE; - const float icon_size = ICON_SIZE_FROM_BUTRECT(rect); + const float icon_size = ICON_DEFAULT_WIDTH; /* menu item - add some more padding so menus don't feel cramped. it must * be part of the button so that this area is still clickable */ - if (ui_block_is_menu(but->block)) + if (ui_block_is_menu(but->block) && !ui_block_is_pie_menu(but->block)) rect->xmin += 0.3f * U.widget_unit; widget_draw_icon(but, icon, alpha, rect, show_menu_icon); diff --git a/source/blender/editors/mesh/editmesh_intersect.c b/source/blender/editors/mesh/editmesh_intersect.c index de93211bec4..bc9088401db 100644 --- a/source/blender/editors/mesh/editmesh_intersect.c +++ b/source/blender/editors/mesh/editmesh_intersect.c @@ -137,6 +137,12 @@ enum { ISECT_SEL_UNSEL = 1, }; +enum { + ISECT_SEPARATE_ALL = 0, + ISECT_SEPARATE_CUT = 1, + ISECT_SEPARATE_NONE = 2, +}; + static int edbm_intersect_exec(bContext *C, wmOperator *op) { Object *obedit = CTX_data_edit_object(C); @@ -144,7 +150,9 @@ static int edbm_intersect_exec(bContext *C, wmOperator *op) BMesh *bm = em->bm; const int mode = RNA_enum_get(op->ptr, "mode"); int (*test_fn)(BMFace *, void *); - bool use_separate = RNA_boolean_get(op->ptr, "use_separate"); + bool use_separate_all = false; + bool use_separate_cut = false; + const int separate_mode = RNA_enum_get(op->ptr, "separate_mode"); const float eps = RNA_float_get(op->ptr, "threshold"); bool use_self; bool has_isect; @@ -160,15 +168,42 @@ static int edbm_intersect_exec(bContext *C, wmOperator *op) break; } + switch (separate_mode) { + case ISECT_SEPARATE_ALL: + use_separate_all = true; + break; + case ISECT_SEPARATE_CUT: + if (use_self == false) { + use_separate_cut = true; + } + else { + /* we could support this but would require more advanced logic inside 'BM_mesh_intersect' + * for now just separate all */ + use_separate_all = true; + } + break; + default: /* ISECT_SEPARATE_NONE */ + break; + } has_isect = BM_mesh_intersect( bm, em->looptris, em->tottri, test_fn, NULL, - use_self, use_separate, true, true, + use_self, use_separate_all, true, true, true, -1, eps); + if (use_separate_cut) { + /* detach selected/un-selected faces */ + BMOperator bmop; + EDBM_op_init(em, &bmop, op, "split geom=%hf use_only_faces=%b", BM_ELEM_SELECT, true); + BMO_op_exec(em->bm, &bmop); + if (!EDBM_op_finish(em, &bmop, op, true)) { + /* should never happen! */ + BKE_report(op->reports, RPT_ERROR, "Error separating"); + } + } if (has_isect) { edbm_intersect_select(em); @@ -190,6 +225,16 @@ void MESH_OT_intersect(struct wmOperatorType *ot) {0, NULL, 0, NULL, NULL} }; + static EnumPropertyItem isect_separate_items[] = { + {ISECT_SEPARATE_ALL, "ALL", 0, "All", + "Separate all geometry from intersections"}, + {ISECT_SEPARATE_CUT, "CUT", 0, "Cut", + "Cut into geometry keeping each side separate (Selected/Unselected only)"}, + {ISECT_SEPARATE_NONE, "NONE", 0, "Merge", + "Merge all geometry from the intersection"}, + {0, NULL, 0, NULL, NULL} + }; + /* identifiers */ ot->name = "Intersect (Knife)"; ot->description = "Cut an intersection into faces"; @@ -201,7 +246,7 @@ void MESH_OT_intersect(struct wmOperatorType *ot) /* props */ RNA_def_enum(ot->srna, "mode", isect_mode_items, ISECT_SEL_UNSEL, "Source", ""); - RNA_def_boolean(ot->srna, "use_separate", true, "Separate", ""); + RNA_def_enum(ot->srna, "separate_mode", isect_separate_items, ISECT_SEPARATE_CUT, "Separate Mode", ""); RNA_def_float_distance(ot->srna, "threshold", 0.000001f, 0.0, 0.01, "Merge threshold", "", 0.0, 0.001); /* flags */ @@ -239,7 +284,7 @@ static int edbm_intersect_boolean_exec(bContext *C, wmOperator *op) bm, em->looptris, em->tottri, test_fn, NULL, - false, false, true, true, + false, false, true, true, true, boolean_operation, eps); diff --git a/source/blender/editors/metaball/mball_edit.c b/source/blender/editors/metaball/mball_edit.c index ed5bf4a92b4..bc42717b69f 100644 --- a/source/blender/editors/metaball/mball_edit.c +++ b/source/blender/editors/metaball/mball_edit.c @@ -592,12 +592,9 @@ bool ED_mball_select_pick(bContext *C, const int mval[2], bool extend, bool dese view3d_set_viewcontext(C, &vc); - rect.xmin = mval[0] - 12; - rect.xmax = mval[0] + 12; - rect.ymin = mval[1] - 12; - rect.ymax = mval[1] + 12; + BLI_rcti_init_pt_radius(&rect, mval, 12); - hits = view3d_opengl_select(&vc, buffer, MAXPICKBUF, &rect, true); + hits = view3d_opengl_select(&vc, buffer, MAXPICKBUF, &rect, VIEW3D_SELECT_PICK_NEAREST); /* does startelem exist? */ ml = mb->editelems->first; diff --git a/source/blender/editors/object/object_add.c b/source/blender/editors/object/object_add.c index 02b2d8492b4..ae458c722f9 100644 --- a/source/blender/editors/object/object_add.c +++ b/source/blender/editors/object/object_add.c @@ -64,6 +64,7 @@ #include "BKE_armature.h" #include "BKE_camera.h" #include "BKE_context.h" +#include "BKE_constraint.h" #include "BKE_curve.h" #include "BKE_depsgraph.h" #include "BKE_DerivedMesh.h" @@ -1377,7 +1378,7 @@ static void make_object_duplilist_real(bContext *C, Scene *scene, Base *base, ob->proxy = NULL; ob->parent = NULL; - BLI_listbase_clear(&ob->constraints); + BKE_constraints_free(&ob->constraints); ob->curve_cache = NULL; ob->transflag &= ~OB_DUPLI; ob->lay = base->lay; diff --git a/source/blender/editors/object/object_bake_api.c b/source/blender/editors/object/object_bake_api.c index fd95d6129ad..968081818a2 100644 --- a/source/blender/editors/object/object_bake_api.c +++ b/source/blender/editors/object/object_bake_api.c @@ -352,12 +352,17 @@ static bool is_noncolor_pass(ScenePassType pass_type) } /* if all is good tag image and return true */ -static bool bake_object_check(Object *ob, ReportList *reports) +static bool bake_object_check(Scene *scene, Object *ob, ReportList *reports) { Image *image; void *lock; int i; + if ((ob->lay & scene->lay) == 0) { + BKE_reportf(reports, RPT_ERROR, "Object \"%s\" is not on a scene layer", ob->id.name + 2); + return false; + } + if (ob->type != OB_MESH) { BKE_reportf(reports, RPT_ERROR, "Object \"%s\" is not a mesh", ob->id.name + 2); return false; @@ -491,7 +496,7 @@ static bool bake_pass_filter_check(ScenePassType pass_type, const int pass_filte } /* before even getting in the bake function we check for some basic errors */ -static bool bake_objects_check(Main *bmain, Object *ob, ListBase *selected_objects, +static bool bake_objects_check(Main *bmain, Scene *scene, Object *ob, ListBase *selected_objects, ReportList *reports, const bool is_selected_to_active) { CollectionPointerLink *link; @@ -502,7 +507,7 @@ static bool bake_objects_check(Main *bmain, Object *ob, ListBase *selected_objec if (is_selected_to_active) { int tot_objects = 0; - if (!bake_object_check(ob, reports)) + if (!bake_object_check(scene, ob, reports)) return false; for (link = selected_objects->first; link; link = link->next) { @@ -530,7 +535,7 @@ static bool bake_objects_check(Main *bmain, Object *ob, ListBase *selected_objec } for (link = selected_objects->first; link; link = link->next) { - if (!bake_object_check(link->ptr.data, reports)) + if (!bake_object_check(scene, link->ptr.data, reports)) return false; } } @@ -619,7 +624,7 @@ static Mesh *bake_mesh_new_from_object(Main *bmain, Scene *scene, Object *ob) ED_object_editmode_load(ob); Mesh *me = BKE_mesh_new_from_object(bmain, scene, ob, 1, 2, 0, 0); - BKE_mesh_split_faces(me); + BKE_mesh_split_faces(me, true); return me; } @@ -1179,7 +1184,7 @@ static int bake_exec(bContext *C, wmOperator *op) goto finally; } - if (!bake_objects_check(bkr.main, bkr.ob, &bkr.selected_objects, bkr.reports, bkr.is_selected_to_active)) { + if (!bake_objects_check(bkr.main, bkr.scene, bkr.ob, &bkr.selected_objects, bkr.reports, bkr.is_selected_to_active)) { goto finally; } @@ -1237,7 +1242,7 @@ static void bake_startjob(void *bkv, short *UNUSED(stop), short *do_update, floa return; } - if (!bake_objects_check(bkr->main, bkr->ob, &bkr->selected_objects, bkr->reports, bkr->is_selected_to_active)) { + if (!bake_objects_check(bkr->main, bkr->scene, bkr->ob, &bkr->selected_objects, bkr->reports, bkr->is_selected_to_active)) { bkr->result = OPERATOR_CANCELLED; return; } diff --git a/source/blender/editors/object/object_intern.h b/source/blender/editors/object/object_intern.h index 9710e4f843d..b8957bdedf9 100644 --- a/source/blender/editors/object/object_intern.h +++ b/source/blender/editors/object/object_intern.h @@ -186,6 +186,7 @@ void OBJECT_OT_skin_loose_mark_clear(struct wmOperatorType *ot); void OBJECT_OT_skin_radii_equalize(struct wmOperatorType *ot); void OBJECT_OT_skin_armature_create(struct wmOperatorType *ot); void OBJECT_OT_laplaciandeform_bind(struct wmOperatorType *ot); +void OBJECT_OT_surfacedeform_bind(struct wmOperatorType *ot); /* object_constraint.c */ void OBJECT_OT_constraint_add(struct wmOperatorType *ot); diff --git a/source/blender/editors/object/object_modifier.c b/source/blender/editors/object/object_modifier.c index 06f495fb9f1..d601f5c3b14 100644 --- a/source/blender/editors/object/object_modifier.c +++ b/source/blender/editors/object/object_modifier.c @@ -2294,3 +2294,56 @@ void OBJECT_OT_laplaciandeform_bind(wmOperatorType *ot) ot->flag = OPTYPE_REGISTER | OPTYPE_UNDO | OPTYPE_INTERNAL; edit_modifier_properties(ot); } + +/************************ sdef bind operator *********************/ + +static int surfacedeform_bind_poll(bContext *C) +{ + return edit_modifier_poll_generic(C, &RNA_SurfaceDeformModifier, 0); +} + +static int surfacedeform_bind_exec(bContext *C, wmOperator *op) +{ + Object *ob = ED_object_active_context(C); + SurfaceDeformModifierData *smd = (SurfaceDeformModifierData *)edit_modifier_property_get(op, ob, eModifierType_SurfaceDeform); + + if (!smd) + return OPERATOR_CANCELLED; + + if (smd->flags & MOD_SDEF_BIND) { + smd->flags &= ~MOD_SDEF_BIND; + } + else if (smd->target) { + smd->flags |= MOD_SDEF_BIND; + } + + DAG_id_tag_update(&ob->id, OB_RECALC_DATA); + WM_event_add_notifier(C, NC_OBJECT | ND_MODIFIER, ob); + + return OPERATOR_FINISHED; +} + +static int surfacedeform_bind_invoke(bContext *C, wmOperator *op, const wmEvent *UNUSED(event)) +{ + if (edit_modifier_invoke_properties(C, op)) + return surfacedeform_bind_exec(C, op); + else + return OPERATOR_CANCELLED; +} + +void OBJECT_OT_surfacedeform_bind(wmOperatorType *ot) +{ + /* identifiers */ + ot->name = "Surface Deform Bind"; + ot->description = "Bind mesh to target in surface deform modifier"; + ot->idname = "OBJECT_OT_surfacedeform_bind"; + + /* api callbacks */ + ot->poll = surfacedeform_bind_poll; + ot->invoke = surfacedeform_bind_invoke; + ot->exec = surfacedeform_bind_exec; + + /* flags */ + ot->flag = OPTYPE_REGISTER | OPTYPE_UNDO | OPTYPE_INTERNAL; + edit_modifier_properties(ot); +} diff --git a/source/blender/editors/object/object_ops.c b/source/blender/editors/object/object_ops.c index 7e7e1ef182c..5fe5a884354 100644 --- a/source/blender/editors/object/object_ops.c +++ b/source/blender/editors/object/object_ops.c @@ -255,6 +255,7 @@ void ED_operatortypes_object(void) WM_operatortype_append(OBJECT_OT_data_transfer); WM_operatortype_append(OBJECT_OT_datalayout_transfer); + WM_operatortype_append(OBJECT_OT_surfacedeform_bind); } void ED_operatormacros_object(void) diff --git a/source/blender/editors/physics/physics_ops.c b/source/blender/editors/physics/physics_ops.c index 0c907f19753..b1d708ebc07 100644 --- a/source/blender/editors/physics/physics_ops.c +++ b/source/blender/editors/physics/physics_ops.c @@ -138,13 +138,21 @@ static void keymap_particle(wmKeyConfig *keyconf) RNA_boolean_set(kmi->ptr, "unselected", true); /* Shift+LMB behavior first, so it has priority over KM_ANY item below. */ - kmi = WM_keymap_verify_item(keymap, "VIEW3D_OT_manipulator", LEFTMOUSE, KM_PRESS, KM_SHIFT, 0); + kmi = WM_keymap_add_item(keymap, "VIEW3D_OT_manipulator", LEFTMOUSE, KM_PRESS, KM_SHIFT, 0); RNA_boolean_set(kmi->ptr, "release_confirm", true); RNA_boolean_set(kmi->ptr, "use_planar_constraint", true); + RNA_boolean_set(kmi->ptr, "use_accurate", false); + + kmi = WM_keymap_add_item(keymap, "VIEW3D_OT_manipulator", LEFTMOUSE, KM_PRESS, KM_SHIFT, 0); + RNA_boolean_set(kmi->ptr, "release_confirm", true); + RNA_boolean_set(kmi->ptr, "use_planar_constraint", false); + RNA_boolean_set(kmi->ptr, "use_accurate", true); + /* Using KM_ANY here to allow holding modifiers before starting to transform. */ kmi = WM_keymap_add_item(keymap, "VIEW3D_OT_manipulator", LEFTMOUSE, KM_PRESS, KM_ANY, 0); RNA_boolean_set(kmi->ptr, "release_confirm", true); RNA_boolean_set(kmi->ptr, "use_planar_constraint", false); + RNA_boolean_set(kmi->ptr, "use_accurate", false); WM_keymap_add_item(keymap, "PARTICLE_OT_brush_edit", LEFTMOUSE, KM_PRESS, 0, 0); WM_keymap_add_item(keymap, "PARTICLE_OT_brush_edit", LEFTMOUSE, KM_PRESS, KM_SHIFT, 0); diff --git a/source/blender/editors/render/render_opengl.c b/source/blender/editors/render/render_opengl.c index 9097432a251..1d0f433ba38 100644 --- a/source/blender/editors/render/render_opengl.c +++ b/source/blender/editors/render/render_opengl.c @@ -315,7 +315,7 @@ static void screen_opengl_render_doit(OGLRender *oglrender, RenderResult *rr) RE_render_result_rect_from_ibuf(rr, &scene->r, out, oglrender->view_id); IMB_freeImBuf(out); } - else if (gpd){ + else if (gpd) { /* If there are no strips, Grease Pencil still needs a buffer to draw on */ ImBuf *out = IMB_allocImBuf(oglrender->sizex, oglrender->sizey, 32, IB_rect); RE_render_result_rect_from_ibuf(rr, &scene->r, out, oglrender->view_id); @@ -715,7 +715,6 @@ static bool screen_opengl_render_init(bContext *C, wmOperator *op) oglrender->task_scheduler = task_scheduler; oglrender->task_pool = BLI_task_pool_create_background(task_scheduler, oglrender); - BLI_pool_set_num_threads(oglrender->task_pool, 1); } else { oglrender->task_scheduler = NULL; @@ -747,6 +746,23 @@ static void screen_opengl_render_end(bContext *C, OGLRender *oglrender) int i; if (oglrender->is_animation) { + /* Trickery part for movie output: + * + * We MUST write frames in an exact order, so we only let background + * thread to work on that, and main thread is simply waits for that + * thread to do all the dirty work. + * + * After this loop is done work_and_wait() will have nothing to do, + * so we don't run into wrong order of frames written to the stream. + */ + if (BKE_imtype_is_movie(scene->r.im_format.imtype)) { + BLI_mutex_lock(&oglrender->task_mutex); + while (oglrender->num_scheduled_frames > 0) { + BLI_condition_wait(&oglrender->task_condition, + &oglrender->task_mutex); + } + BLI_mutex_unlock(&oglrender->task_mutex); + } BLI_task_pool_work_and_wait(oglrender->task_pool); BLI_task_pool_free(oglrender->task_pool); /* Depending on various things we might or might not use global scheduler. */ @@ -886,14 +902,15 @@ static void write_result_func(TaskPool * __restrict pool, */ ReportList reports; BKE_reports_init(&reports, oglrender->reports->flag & ~RPT_PRINT); - /* Do actual save logic here, depending on the file format. */ + /* Do actual save logic here, depending on the file format. + * + * NOTE: We have to construct temporary scene with proper scene->r.cfra. + * This is because underlying calls do not use r.cfra but use scene + * for that. + */ + Scene tmp_scene = *scene; + tmp_scene.r.cfra = cfra; if (is_movie) { - /* We have to construct temporary scene with proper scene->r.cfra. - * This is because underlying calls do not use r.cfra but use scene - * for that. - */ - Scene tmp_scene = *scene; - tmp_scene.r.cfra = cfra; ok = RE_WriteRenderViewsMovie(&reports, rr, &tmp_scene, @@ -917,8 +934,8 @@ static void write_result_func(TaskPool * __restrict pool, true, NULL); - BKE_render_result_stamp_info(scene, scene->camera, rr, false); - ok = RE_WriteRenderViewsImage(NULL, rr, scene, true, name); + BKE_render_result_stamp_info(&tmp_scene, tmp_scene.camera, rr, false); + ok = RE_WriteRenderViewsImage(NULL, rr, &tmp_scene, true, name); if (!ok) { BKE_reportf(&reports, RPT_ERROR, diff --git a/source/blender/editors/sculpt_paint/paint_image_proj.c b/source/blender/editors/sculpt_paint/paint_image_proj.c index f5d115442c6..d0f1cc99b8d 100644 --- a/source/blender/editors/sculpt_paint/paint_image_proj.c +++ b/source/blender/editors/sculpt_paint/paint_image_proj.c @@ -5711,21 +5711,16 @@ static bool proj_paint_add_slot(bContext *C, wmOperator *op) /* successful creation of mtex layer, now create set */ if (mtex) { int type = MAP_COL; - int type_id = 0; + char imagename_buff[MAX_ID_NAME - 2]; + const char *imagename = DATA_("Diffuse Color"); if (op) { - int i; type = RNA_enum_get(op->ptr, "type"); - - for (i = 0; i < ARRAY_SIZE(layer_type_items); i++) { - if (layer_type_items[i].value == type) { - type_id = i; - break; - } - } + RNA_string_get(op->ptr, "name", imagename_buff); + imagename = imagename_buff; } - mtex->tex = BKE_texture_add(bmain, DATA_(layer_type_items[type_id].name)); + mtex->tex = BKE_texture_add(bmain, imagename); mtex->mapto = type; if (mtex->tex) { diff --git a/source/blender/editors/sculpt_paint/sculpt.c b/source/blender/editors/sculpt_paint/sculpt.c index 84e98181dfb..44cc2720a32 100644 --- a/source/blender/editors/sculpt_paint/sculpt.c +++ b/source/blender/editors/sculpt_paint/sculpt.c @@ -5361,8 +5361,12 @@ static int sculpt_mode_toggle_exec(bContext *C, wmOperator *op) if (mmd) multires_force_update(ob); - if (flush_recalc || (ob->sculpt && ob->sculpt->bm)) + /* Always for now, so leaving sculpt mode always ensures scene is in + * a consistent state. + */ + if (true || flush_recalc || (ob->sculpt && ob->sculpt->bm)) { DAG_id_tag_update(&ob->id, OB_RECALC_DATA); + } if (me->flag & ME_SCULPT_DYNAMIC_TOPOLOGY) { /* Dynamic topology must be disabled before exiting sculpt diff --git a/source/blender/editors/space_clip/tracking_ops.c b/source/blender/editors/space_clip/tracking_ops.c index dc3aa3a5f48..8b9f515995a 100644 --- a/source/blender/editors/space_clip/tracking_ops.c +++ b/source/blender/editors/space_clip/tracking_ops.c @@ -1537,7 +1537,8 @@ static int join_tracks_exec(bContext *C, wmOperator *op) update_stabilization = true; if ((act_track->flag & TRACK_USE_2D_STAB) == 0) { act_track->flag |= TRACK_USE_2D_STAB; - } else { + } + else { stab->tot_track--; } BLI_assert(0 <= stab->tot_track); @@ -1546,7 +1547,8 @@ static int join_tracks_exec(bContext *C, wmOperator *op) update_stabilization = true; if ((act_track->flag & TRACK_USE_2D_STAB_ROT) == 0) { act_track->flag |= TRACK_USE_2D_STAB_ROT; - } else { + } + else { stab->tot_rot_track--; } BLI_assert(0 <= stab->tot_rot_track); diff --git a/source/blender/editors/space_nla/nla_draw.c b/source/blender/editors/space_nla/nla_draw.c index 5b3c062e16d..93dcdbb5c02 100644 --- a/source/blender/editors/space_nla/nla_draw.c +++ b/source/blender/editors/space_nla/nla_draw.c @@ -290,7 +290,8 @@ static void nla_draw_strip_curves(NlaStrip *strip, float yminc, float ymaxc) * - min y-val is yminc, max is y-maxc, so clamp in those regions */ for (cfra = strip->start; cfra <= strip->end; cfra += 1.0f) { - float y = evaluate_fcurve(fcu, cfra); // assume this to be in 0-1 range + float y = evaluate_fcurve(fcu, cfra); + CLAMP(y, 0.0f, 1.0f); glVertex2f(cfra, ((y * yheight) + yminc)); } glEnd(); // GL_LINE_STRIP diff --git a/source/blender/editors/space_node/node_edit.c b/source/blender/editors/space_node/node_edit.c index ffe510016ff..fdfe316f5ed 100644 --- a/source/blender/editors/space_node/node_edit.c +++ b/source/blender/editors/space_node/node_edit.c @@ -582,7 +582,7 @@ void snode_set_context(const bContext *C) } } - if (snode->nodetree != ntree || snode->id != id || snode->from != from) { + if (snode->nodetree != ntree || snode->id != id || snode->from != from || snode->treepath.last == NULL) { ED_node_tree_start(snode, ntree, id, from); } @@ -1069,12 +1069,9 @@ int node_find_indicated_socket(SpaceNode *snode, bNode **nodep, bNodeSocket **so /* check if we click in a socket */ for (node = snode->edittree->nodes.first; node; node = node->next) { - - rect.xmin = cursor[0] - (NODE_SOCKSIZE + 4); - rect.ymin = cursor[1] - (NODE_SOCKSIZE + 4); - rect.xmax = cursor[0] + (NODE_SOCKSIZE + 4); - rect.ymax = cursor[1] + (NODE_SOCKSIZE + 4); - + + BLI_rctf_init_pt_radius(&rect, cursor, NODE_SOCKSIZE + 4); + if (!(node->flag & NODE_HIDDEN)) { /* extra padding inside and out - allow dragging on the text areas too */ if (in_out == SOCK_IN) { diff --git a/source/blender/editors/space_outliner/outliner_draw.c b/source/blender/editors/space_outliner/outliner_draw.c index 99242fd12f9..684a1f9fd67 100644 --- a/source/blender/editors/space_outliner/outliner_draw.c +++ b/source/blender/editors/space_outliner/outliner_draw.c @@ -1126,6 +1126,7 @@ static void tselem_draw_icon(uiBlock *block, int xmax, float x, float y, TreeSto case eModifierType_Cast: UI_icon_draw(x, y, ICON_MOD_CAST); break; case eModifierType_MeshDeform: + case eModifierType_SurfaceDeform: UI_icon_draw(x, y, ICON_MOD_MESHDEFORM); break; case eModifierType_Bevel: UI_icon_draw(x, y, ICON_MOD_BEVEL); break; diff --git a/source/blender/editors/space_sequencer/sequencer_draw.c b/source/blender/editors/space_sequencer/sequencer_draw.c index e1768e4aedc..70a6e6d83cb 100644 --- a/source/blender/editors/space_sequencer/sequencer_draw.c +++ b/source/blender/editors/space_sequencer/sequencer_draw.c @@ -545,7 +545,8 @@ static void draw_seq_text(View2D *v2d, SpaceSeq *sseq, Sequence *seq, float x1, if ((sseq->flag & SEQ_ALL_WAVEFORMS) || (seq->flag & SEQ_AUDIO_DRAW_WAVEFORM)) { str[0] = 0; str_len = 0; - } else if (seq->sound) { + } + else if (seq->sound) { str_len = BLI_snprintf(str, sizeof(str), "%s: %s | %d", name, seq->sound->name, seq->len); } diff --git a/source/blender/editors/space_view3d/drawvolume.c b/source/blender/editors/space_view3d/drawvolume.c index 27ecbf83db5..182dc214f8e 100644 --- a/source/blender/editors/space_view3d/drawvolume.c +++ b/source/blender/editors/space_view3d/drawvolume.c @@ -774,8 +774,8 @@ void draw_smoke_velocity(SmokeDomainSettings *domain, float viewnormal[3]) float min[3] = { domain->p0[0] - domain->cell_size[0] * domain->adapt_res, - domain->p0[1] - domain->cell_size[1] * domain->adapt_res, - domain->p0[2] - domain->cell_size[2] * domain->adapt_res, + domain->p0[1] - domain->cell_size[1] * domain->adapt_res, + domain->p0[2] - domain->cell_size[2] * domain->adapt_res, }; int num_points_v[3] = { diff --git a/source/blender/editors/space_view3d/space_view3d.c b/source/blender/editors/space_view3d/space_view3d.c index 964f4bcdd9c..b8228c63209 100644 --- a/source/blender/editors/space_view3d/space_view3d.c +++ b/source/blender/editors/space_view3d/space_view3d.c @@ -180,8 +180,8 @@ bool ED_view3d_context_user_region(bContext *C, View3D **r_v3d, ARegion **r_ar) View3D *v3d = (View3D *)sa->spacedata.first; if (ar) { - RegionView3D *rv3d = ar->regiondata; - if (rv3d && (rv3d->viewlock & RV3D_LOCKED) == 0) { + RegionView3D *rv3d; + if ((ar->regiontype == RGN_TYPE_WINDOW) && (rv3d = ar->regiondata) && (rv3d->viewlock & RV3D_LOCKED) == 0) { *r_v3d = v3d; *r_ar = ar; return true; @@ -869,6 +869,7 @@ static void view3d_main_region_listener(bScreen *sc, ScrArea *sa, ARegion *ar, w case ND_CONSTRAINT: case ND_KEYS: case ND_PARTICLE: + case ND_POINTCACHE: case ND_LOD: ED_region_tag_redraw(ar); break; diff --git a/source/blender/editors/space_view3d/view3d_draw.c b/source/blender/editors/space_view3d/view3d_draw.c index f23e587e55d..0c5cf1bd936 100644 --- a/source/blender/editors/space_view3d/view3d_draw.c +++ b/source/blender/editors/space_view3d/view3d_draw.c @@ -2955,7 +2955,7 @@ struct RV3DMatrixStore { float pixsize; }; -void *ED_view3d_mats_rv3d_backup(struct RegionView3D *rv3d) +struct RV3DMatrixStore *ED_view3d_mats_rv3d_backup(struct RegionView3D *rv3d) { struct RV3DMatrixStore *rv3dmat = MEM_mallocN(sizeof(*rv3dmat), __func__); copy_m4_m4(rv3dmat->winmat, rv3d->winmat); @@ -2968,9 +2968,8 @@ void *ED_view3d_mats_rv3d_backup(struct RegionView3D *rv3d) return (void *)rv3dmat; } -void ED_view3d_mats_rv3d_restore(struct RegionView3D *rv3d, void *rv3dmat_pt) +void ED_view3d_mats_rv3d_restore(struct RegionView3D *rv3d, struct RV3DMatrixStore *rv3dmat) { - struct RV3DMatrixStore *rv3dmat = rv3dmat_pt; copy_m4_m4(rv3d->winmat, rv3dmat->winmat); copy_m4_m4(rv3d->viewmat, rv3dmat->viewmat); copy_m4_m4(rv3d->persmat, rv3dmat->persmat); diff --git a/source/blender/editors/space_view3d/view3d_edit.c b/source/blender/editors/space_view3d/view3d_edit.c index 2b53eb71d99..f07727f8118 100644 --- a/source/blender/editors/space_view3d/view3d_edit.c +++ b/source/blender/editors/space_view3d/view3d_edit.c @@ -90,19 +90,6 @@ bool ED_view3d_offset_lock_check(const View3D *v3d, const RegionView3D *rv3d) return (rv3d->persp != RV3D_CAMOB) && (v3d->ob_centre_cursor || v3d->ob_centre); } -static bool view3d_operator_offset_lock_check(bContext *C, wmOperator *op) -{ - View3D *v3d = CTX_wm_view3d(C); - RegionView3D *rv3d = CTX_wm_region_view3d(C); - if (ED_view3d_offset_lock_check(v3d, rv3d)) { - BKE_report(op->reports, RPT_WARNING, "View offset is locked"); - return true; - } - else { - return false; - } -} - /* ********************** view3d_edit: view manipulations ********************* */ /** @@ -2596,6 +2583,19 @@ void VIEW3D_OT_zoom(wmOperatorType *ot) /* ************************ viewdolly ******************************** */ +static bool viewdolly_offset_lock_check(bContext *C, wmOperator *op) +{ + View3D *v3d = CTX_wm_view3d(C); + RegionView3D *rv3d = CTX_wm_region_view3d(C); + if (ED_view3d_offset_lock_check(v3d, rv3d)) { + BKE_report(op->reports, RPT_WARNING, "Cannot dolly when the view offset is locked"); + return true; + } + else { + return false; + } +} + static void view_dolly_mouseloc(ARegion *ar, float orig_ofs[3], float dvec[3], float dfac) { RegionView3D *rv3d = ar->regiondata; @@ -2746,7 +2746,7 @@ static int viewdolly_invoke(bContext *C, wmOperator *op, const wmEvent *event) { ViewOpsData *vod; - if (view3d_operator_offset_lock_check(C, op)) + if (viewdolly_offset_lock_check(C, op)) return OPERATOR_CANCELLED; /* makes op->customdata */ @@ -4364,41 +4364,24 @@ static EnumPropertyItem prop_view_pan_items[] = { {0, NULL, 0, NULL, NULL} }; -static int viewpan_exec(bContext *C, wmOperator *op) +static int viewpan_invoke(bContext *C, wmOperator *op, const wmEvent *event) { - ScrArea *sa = CTX_wm_area(C); - ARegion *ar = CTX_wm_region(C); - View3D *v3d = CTX_wm_view3d(C); - RegionView3D *rv3d = CTX_wm_region_view3d(C); - float vec[3]; - const float co_zero[3] = {0.0f}; - float mval_f[2] = {0.0f, 0.0f}; - float zfac; - int pandir; + int x = 0, y = 0; + int pandir = RNA_enum_get(op->ptr, "type"); - if (view3d_operator_offset_lock_check(C, op)) - return OPERATOR_CANCELLED; + if (pandir == V3D_VIEW_PANRIGHT) { x = -32; } + else if (pandir == V3D_VIEW_PANLEFT) { x = 32; } + else if (pandir == V3D_VIEW_PANUP) { y = -25; } + else if (pandir == V3D_VIEW_PANDOWN) { y = 25; } - pandir = RNA_enum_get(op->ptr, "type"); - - ED_view3d_camera_lock_init(v3d, rv3d); - - zfac = ED_view3d_calc_zfac(rv3d, co_zero, NULL); - if (pandir == V3D_VIEW_PANRIGHT) { mval_f[0] = -32.0f; } - else if (pandir == V3D_VIEW_PANLEFT) { mval_f[0] = 32.0f; } - else if (pandir == V3D_VIEW_PANUP) { mval_f[1] = -25.0f; } - else if (pandir == V3D_VIEW_PANDOWN) { mval_f[1] = 25.0f; } - ED_view3d_win_to_delta(ar, mval_f, vec, zfac); - add_v3_v3(rv3d->ofs, vec); - - if (rv3d->viewlock & RV3D_BOXVIEW) - view3d_boxview_sync(sa, ar); - - ED_view3d_depth_tag_update(rv3d); + viewops_data_alloc(C, op); + viewops_data_create(C, op, event); + ViewOpsData *vod = op->customdata; - ED_view3d_camera_lock_sync(v3d, rv3d); + viewmove_apply(vod, vod->oldx + x, vod->oldy + y); - ED_region_tag_redraw(ar); + ED_view3d_depth_tag_update(vod->rv3d); + viewops_data_free(C, op); return OPERATOR_FINISHED; } @@ -4411,7 +4394,7 @@ void VIEW3D_OT_view_pan(wmOperatorType *ot) ot->idname = "VIEW3D_OT_view_pan"; /* api callbacks */ - ot->exec = viewpan_exec; + ot->invoke = viewpan_invoke; ot->poll = ED_operator_region_view3d_active; /* flags */ @@ -4798,6 +4781,7 @@ static int manipulator_invoke(bContext *C, wmOperator *op, const wmEvent *event) void VIEW3D_OT_manipulator(wmOperatorType *ot) { + PropertyRNA *prop; /* identifiers */ ot->name = "3D Manipulator"; @@ -4812,8 +4796,9 @@ void VIEW3D_OT_manipulator(wmOperatorType *ot) /* properties to pass to transform */ Transform_Properties(ot, P_CONSTRAINT); - RNA_def_boolean(ot->srna, "use_planar_constraint", false, "Planar Constraint", "Limit the transformation to the " - "two axes that have not been clicked (translate/scale only)"); + prop = RNA_def_boolean(ot->srna, "use_planar_constraint", false, "Planar Constraint", "Limit the transformation to the " + "two axes that have not been clicked (translate/scale only)"); + RNA_def_property_flag(prop, PROP_SKIP_SAVE | PROP_HIDDEN); } static int enable_manipulator_invoke(bContext *C, wmOperator *op, const wmEvent *UNUSED(event)) @@ -4902,11 +4887,7 @@ static float view_autodist_depth_margin(ARegion *ar, const int mval[2], int marg rect.ymax = mval[1] + 1; } else { - rect.xmax = mval[0] + margin; - rect.ymax = mval[1] + margin; - - rect.xmin = mval[0] - margin; - rect.ymin = mval[1] - margin; + BLI_rcti_init_pt_radius(&rect, mval, margin); } view3d_update_depths_rect(ar, &depth_temp, &rect); diff --git a/source/blender/editors/space_view3d/view3d_intern.h b/source/blender/editors/space_view3d/view3d_intern.h index b11f42bcfef..87b3d95cd4e 100644 --- a/source/blender/editors/space_view3d/view3d_intern.h +++ b/source/blender/editors/space_view3d/view3d_intern.h @@ -241,7 +241,7 @@ void ED_view3d_smooth_view_force_finish( struct bContext *C, struct View3D *v3d, struct ARegion *ar); -void view3d_winmatrix_set(ARegion *ar, const View3D *v3d, const rctf *rect); +void view3d_winmatrix_set(ARegion *ar, const View3D *v3d, const rcti *rect); void view3d_viewmatrix_set(Scene *scene, const View3D *v3d, RegionView3D *rv3d); void fly_modal_keymap(struct wmKeyConfig *keyconf); diff --git a/source/blender/editors/space_view3d/view3d_ops.c b/source/blender/editors/space_view3d/view3d_ops.c index 0fa6841fe27..d71639c35d2 100644 --- a/source/blender/editors/space_view3d/view3d_ops.c +++ b/source/blender/editors/space_view3d/view3d_ops.c @@ -241,13 +241,21 @@ void view3d_keymap(wmKeyConfig *keyconf) keymap = WM_keymap_find(keyconf, "3D View", SPACE_VIEW3D, 0); /* Shift+LMB behavior first, so it has priority over KM_ANY item below. */ - kmi = WM_keymap_verify_item(keymap, "VIEW3D_OT_manipulator", LEFTMOUSE, KM_PRESS, KM_SHIFT, 0); + kmi = WM_keymap_add_item(keymap, "VIEW3D_OT_manipulator", LEFTMOUSE, KM_PRESS, KM_SHIFT, 0); RNA_boolean_set(kmi->ptr, "release_confirm", true); RNA_boolean_set(kmi->ptr, "use_planar_constraint", true); + RNA_boolean_set(kmi->ptr, "use_accurate", false); + + kmi = WM_keymap_add_item(keymap, "VIEW3D_OT_manipulator", LEFTMOUSE, KM_PRESS, KM_SHIFT, 0); + RNA_boolean_set(kmi->ptr, "release_confirm", true); + RNA_boolean_set(kmi->ptr, "use_planar_constraint", false); + RNA_boolean_set(kmi->ptr, "use_accurate", true); + /* Using KM_ANY here to allow holding modifiers before starting to transform. */ kmi = WM_keymap_add_item(keymap, "VIEW3D_OT_manipulator", LEFTMOUSE, KM_PRESS, KM_ANY, 0); RNA_boolean_set(kmi->ptr, "release_confirm", true); RNA_boolean_set(kmi->ptr, "use_planar_constraint", false); + RNA_boolean_set(kmi->ptr, "use_accurate", false); WM_keymap_verify_item(keymap, "VIEW3D_OT_cursor3d", ACTIONMOUSE, KM_PRESS, 0, 0); diff --git a/source/blender/editors/space_view3d/view3d_select.c b/source/blender/editors/space_view3d/view3d_select.c index 3239d07553f..0c0a7df8f84 100644 --- a/source/blender/editors/space_view3d/view3d_select.c +++ b/source/blender/editors/space_view3d/view3d_select.c @@ -96,8 +96,12 @@ #include "GPU_draw.h" +#include "GPU_select.h" + #include "view3d_intern.h" /* own include */ +// #include "PIL_time_utildefines.h" + float ED_view3d_select_dist_px(void) { return 75.0f * U.pixelsize; @@ -1087,7 +1091,9 @@ static void deselectall_except(Scene *scene, Base *b) /* deselect all except b } } -static Base *object_mouse_select_menu(bContext *C, ViewContext *vc, unsigned int *buffer, int hits, const int mval[2], short toggle) +static Base *object_mouse_select_menu( + bContext *C, ViewContext *vc, unsigned int *buffer, int hits, + const int mval[2], bool toggle) { short baseCount = 0; bool ok; @@ -1178,19 +1184,19 @@ static bool selectbuffer_has_bones(const unsigned int *buffer, const unsigned in } /* utility function for mixed_bones_object_selectbuffer */ -static short selectbuffer_ret_hits_15(unsigned int *UNUSED(buffer), const short hits15) +static int selectbuffer_ret_hits_15(unsigned int *UNUSED(buffer), const int hits15) { return hits15; } -static short selectbuffer_ret_hits_9(unsigned int *buffer, const short hits15, const short hits9) +static int selectbuffer_ret_hits_9(unsigned int *buffer, const int hits15, const int hits9) { const int offs = 4 * hits15; memcpy(buffer, buffer + offs, 4 * hits9 * sizeof(unsigned int)); return hits9; } -static short selectbuffer_ret_hits_5(unsigned int *buffer, const short hits15, const short hits9, const short hits5) +static int selectbuffer_ret_hits_5(unsigned int *buffer, const int hits15, const int hits9, const int hits5) { const int offs = 4 * hits15 + 4 * hits9; memcpy(buffer, buffer + offs, 4 * hits5 * sizeof(unsigned int)); @@ -1199,14 +1205,13 @@ static short selectbuffer_ret_hits_5(unsigned int *buffer, const short hits15, c /* we want a select buffer with bones, if there are... */ /* so check three selection levels and compare */ -static short mixed_bones_object_selectbuffer( +static int mixed_bones_object_selectbuffer( ViewContext *vc, unsigned int *buffer, const int mval[2], bool use_cycle, bool enumerate, bool *r_do_nearest) { rcti rect; - int offs; - short hits15, hits9 = 0, hits5 = 0; + int hits15, hits9 = 0, hits5 = 0; bool has_bones15 = false, has_bones9 = false, has_bones5 = false; static int last_mval[2] = {-100, -100}; bool do_nearest = false; @@ -1234,44 +1239,57 @@ static short mixed_bones_object_selectbuffer( do_nearest = do_nearest && !enumerate; - BLI_rcti_init(&rect, mval[0] - 14, mval[0] + 14, mval[1] - 14, mval[1] + 14); - hits15 = view3d_opengl_select(vc, buffer, MAXPICKBUF, &rect, do_nearest); + const int select_mode = (do_nearest ? VIEW3D_SELECT_PICK_NEAREST : VIEW3D_SELECT_PICK_ALL); + int hits = 0; + + /* we _must_ end cache before return, use 'goto finally' */ + GPU_select_cache_begin(); + + BLI_rcti_init_pt_radius(&rect, mval, 14); + hits15 = view3d_opengl_select(vc, buffer, MAXPICKBUF, &rect, select_mode); if (hits15 == 1) { - return selectbuffer_ret_hits_15(buffer, hits15); + hits = selectbuffer_ret_hits_15(buffer, hits15); + goto finally; } else if (hits15 > 0) { + int offs; has_bones15 = selectbuffer_has_bones(buffer, hits15); offs = 4 * hits15; - BLI_rcti_init(&rect, mval[0] - 9, mval[0] + 9, mval[1] - 9, mval[1] + 9); - hits9 = view3d_opengl_select(vc, buffer + offs, MAXPICKBUF - offs, &rect, do_nearest); + BLI_rcti_init_pt_radius(&rect, mval, 9); + hits9 = view3d_opengl_select(vc, buffer + offs, MAXPICKBUF - offs, &rect, select_mode); if (hits9 == 1) { - return selectbuffer_ret_hits_9(buffer, hits15, hits9); + hits = selectbuffer_ret_hits_9(buffer, hits15, hits9); + goto finally; } else if (hits9 > 0) { has_bones9 = selectbuffer_has_bones(buffer + offs, hits9); offs += 4 * hits9; - BLI_rcti_init(&rect, mval[0] - 5, mval[0] + 5, mval[1] - 5, mval[1] + 5); - hits5 = view3d_opengl_select(vc, buffer + offs, MAXPICKBUF - offs, &rect, do_nearest); + BLI_rcti_init_pt_radius(&rect, mval, 5); + hits5 = view3d_opengl_select(vc, buffer + offs, MAXPICKBUF - offs, &rect, select_mode); if (hits5 == 1) { - return selectbuffer_ret_hits_5(buffer, hits15, hits9, hits5); + hits = selectbuffer_ret_hits_5(buffer, hits15, hits9, hits5); + goto finally; } else if (hits5 > 0) { has_bones5 = selectbuffer_has_bones(buffer + offs, hits5); } } - if (has_bones5) return selectbuffer_ret_hits_5(buffer, hits15, hits9, hits5); - else if (has_bones9) return selectbuffer_ret_hits_9(buffer, hits15, hits9); - else if (has_bones15) return selectbuffer_ret_hits_15(buffer, hits15); - - if (hits5 > 0) return selectbuffer_ret_hits_5(buffer, hits15, hits9, hits5); - else if (hits9 > 0) return selectbuffer_ret_hits_9(buffer, hits15, hits9); - else return selectbuffer_ret_hits_15(buffer, hits15); + if (has_bones5) { hits = selectbuffer_ret_hits_5(buffer, hits15, hits9, hits5); goto finally; } + else if (has_bones9) { hits = selectbuffer_ret_hits_9(buffer, hits15, hits9); goto finally; } + else if (has_bones15) { hits = selectbuffer_ret_hits_15(buffer, hits15); goto finally; } + + if (hits5 > 0) { hits = selectbuffer_ret_hits_5(buffer, hits15, hits9, hits5); goto finally; } + else if (hits9 > 0) { hits = selectbuffer_ret_hits_9(buffer, hits15, hits9); goto finally; } + else { hits = selectbuffer_ret_hits_15(buffer, hits15); goto finally; } } - - return 0; + +finally: + GPU_select_cache_end(); + + return hits; } /* returns basact */ @@ -1412,7 +1430,7 @@ static bool ed_object_select_pick( bool is_obedit; float dist = ED_view3d_select_dist_px() * 1.3333f; bool retval = false; - short hits; + int hits; const float mval_fl[2] = {(float)mval[0], (float)mval[1]}; @@ -1464,10 +1482,13 @@ static bool ed_object_select_pick( unsigned int buffer[MAXPICKBUF]; bool do_nearest; + // TIMEIT_START(select_time); + /* if objects have posemode set, the bones are in the same selection buffer */ - hits = mixed_bones_object_selectbuffer(&vc, buffer, mval, true, enumerate, &do_nearest); - + + // TIMEIT_END(select_time); + if (hits > 0) { /* note: bundles are handling in the same way as bones */ const bool has_bones = selectbuffer_has_bones(buffer, hits); @@ -1904,9 +1925,9 @@ static int do_meta_box_select(ViewContext *vc, rcti *rect, bool select, bool ext int a; unsigned int buffer[MAXPICKBUF]; - short hits; + int hits; - hits = view3d_opengl_select(vc, buffer, MAXPICKBUF, rect, false); + hits = view3d_opengl_select(vc, buffer, MAXPICKBUF, rect, VIEW3D_SELECT_ALL); if (extend == false && select) BKE_mball_deselect_all(mb); @@ -1938,9 +1959,9 @@ static int do_armature_box_select(ViewContext *vc, rcti *rect, bool select, bool int a; unsigned int buffer[MAXPICKBUF]; - short hits; + int hits; - hits = view3d_opengl_select(vc, buffer, MAXPICKBUF, rect, false); + hits = view3d_opengl_select(vc, buffer, MAXPICKBUF, rect, VIEW3D_SELECT_ALL); /* clear flag we use to detect point was affected */ for (ebone = arm->edbo->first; ebone; ebone = ebone->next) @@ -2013,7 +2034,7 @@ static int do_object_pose_box_select(bContext *C, ViewContext *vc, rcti *rect, b int bone_only; int bone_selected = 0; int totobj = MAXPICKBUF; /* XXX solve later */ - short hits; + int hits; if ((ob) && (ob->mode & OB_MODE_POSE)) bone_only = 1; @@ -2037,7 +2058,7 @@ static int do_object_pose_box_select(bContext *C, ViewContext *vc, rcti *rect, b /* selection buffer now has bones potentially too, so we add MAXPICKBUF */ vbuffer = MEM_mallocN(4 * (totobj + MAXPICKELEMS) * sizeof(unsigned int), "selection buffer"); - hits = view3d_opengl_select(vc, vbuffer, 4 * (totobj + MAXPICKELEMS), rect, false); + hits = view3d_opengl_select(vc, vbuffer, 4 * (totobj + MAXPICKELEMS), rect, VIEW3D_SELECT_ALL); /* * LOGIC NOTES (theeth): * The buffer and ListBase have the same relative order, which makes the selection @@ -2577,7 +2598,7 @@ static void lattice_circle_select(ViewContext *vc, const bool select, const int /* NOTE: pose-bone case is copied from editbone case... */ -static short pchan_circle_doSelectJoint(void *userData, bPoseChannel *pchan, const float screen_co[2]) +static bool pchan_circle_doSelectJoint(void *userData, bPoseChannel *pchan, const float screen_co[2]) { CircleSelectUserData *data = userData; @@ -2655,7 +2676,7 @@ static void pose_circle_select(ViewContext *vc, const bool select, const int mva } } -static short armature_circle_doSelectJoint(void *userData, EditBone *ebone, const float screen_co[2], short head) +static bool armature_circle_doSelectJoint(void *userData, EditBone *ebone, const float screen_co[2], bool head) { CircleSelectUserData *data = userData; diff --git a/source/blender/editors/space_view3d/view3d_view.c b/source/blender/editors/space_view3d/view3d_view.c index 8582952d1a0..9d1a3633786 100644 --- a/source/blender/editors/space_view3d/view3d_view.c +++ b/source/blender/editors/space_view3d/view3d_view.c @@ -908,7 +908,7 @@ void ED_view3d_polygon_offset(const RegionView3D *rv3d, const float dist) /** * \param rect optional for picking (can be NULL). */ -void view3d_winmatrix_set(ARegion *ar, const View3D *v3d, const rctf *rect) +void view3d_winmatrix_set(ARegion *ar, const View3D *v3d, const rcti *rect) { RegionView3D *rv3d = ar->regiondata; rctf viewplane; @@ -1170,29 +1170,64 @@ static void view3d_select_loop(ViewContext *vc, Scene *scene, View3D *v3d, ARegi * * \note (vc->obedit == NULL) can be set to explicitly skip edit-object selection. */ -short view3d_opengl_select(ViewContext *vc, unsigned int *buffer, unsigned int bufsize, const rcti *input, bool do_nearest) +int view3d_opengl_select( + ViewContext *vc, unsigned int *buffer, unsigned int bufsize, const rcti *input, + int select_mode) { Scene *scene = vc->scene; View3D *v3d = vc->v3d; ARegion *ar = vc->ar; - rctf rect; - short hits; + rcti rect; + int hits; const bool use_obedit_skip = (scene->obedit != NULL) && (vc->obedit == NULL); - const bool do_passes = do_nearest && GPU_select_query_check_active(); + const bool is_pick_select = (U.gpu_select_pick_deph != 0); + const bool do_passes = ( + (is_pick_select == false) && + (select_mode == VIEW3D_SELECT_PICK_NEAREST) && + GPU_select_query_check_active()); + + char gpu_select_mode; - G.f |= G_PICKSEL; - /* case not a border select */ if (input->xmin == input->xmax) { - rect.xmin = input->xmin - 12; /* seems to be default value for bones only now */ - rect.xmax = input->xmin + 12; - rect.ymin = input->ymin - 12; - rect.ymax = input->ymin + 12; + /* seems to be default value for bones only now */ + BLI_rcti_init_pt_radius(&rect, (const int[2]){input->xmin, input->ymin}, 12); } else { - BLI_rctf_rcti_copy(&rect, input); + rect = *input; } - + + if (is_pick_select) { + if (is_pick_select && select_mode == VIEW3D_SELECT_PICK_NEAREST) { + gpu_select_mode = GPU_SELECT_PICK_NEAREST; + } + else if (is_pick_select && select_mode == VIEW3D_SELECT_PICK_ALL) { + gpu_select_mode = GPU_SELECT_PICK_ALL; + } + else { + gpu_select_mode = GPU_SELECT_ALL; + } + } + else { + if (do_passes) { + gpu_select_mode = GPU_SELECT_NEAREST_FIRST_PASS; + } + else { + gpu_select_mode = GPU_SELECT_ALL; + } + } + + /* Re-use cache (rect must be smaller then the cached) + * other context is assumed to be unchanged */ + if (GPU_select_is_cached()) { + GPU_select_begin(buffer, bufsize, &rect, gpu_select_mode, 0); + GPU_select_cache_load_id(); + hits = GPU_select_end(); + goto finally; + } + + G.f |= G_PICKSEL; + view3d_winmatrix_set(ar, v3d, &rect); mul_m4_m4m4(vc->rv3d->persmat, vc->rv3d->winmat, vc->rv3d->viewmat); @@ -1204,10 +1239,7 @@ short view3d_opengl_select(ViewContext *vc, unsigned int *buffer, unsigned int b if (vc->rv3d->rflag & RV3D_CLIPPING) ED_view3d_clipping_set(vc->rv3d); - if (do_passes) - GPU_select_begin(buffer, bufsize, &rect, GPU_SELECT_NEAREST_FIRST_PASS, 0); - else - GPU_select_begin(buffer, bufsize, &rect, GPU_SELECT_ALL, 0); + GPU_select_begin(buffer, bufsize, &rect, gpu_select_mode, 0); view3d_select_loop(vc, scene, v3d, ar, use_obedit_skip); @@ -1233,7 +1265,8 @@ short view3d_opengl_select(ViewContext *vc, unsigned int *buffer, unsigned int b if (vc->rv3d->rflag & RV3D_CLIPPING) ED_view3d_clipping_disable(); - + +finally: if (hits < 0) printf("Too many objects in select buffer\n"); /* XXX make error message */ return hits; diff --git a/source/blender/editors/transform/transform.c b/source/blender/editors/transform/transform.c index 1916f9b4dab..7d9063c3285 100644 --- a/source/blender/editors/transform/transform.c +++ b/source/blender/editors/transform/transform.c @@ -2176,7 +2176,14 @@ bool initTransform(bContext *C, TransInfo *t, wmOperator *op, const wmEvent *eve calculateCenter(t); if (event) { - initMouseInput(t, &t->mouse, t->center2d, event->mval, event->shift); + /* Initialize accurate transform to settings requested by keymap. */ + bool use_accurate = false; + if ((prop = RNA_struct_find_property(op->ptr, "use_accurate")) && RNA_property_is_set(op->ptr, prop)) { + if (RNA_property_boolean_get(op->ptr, prop)) { + use_accurate = true; + } + } + initMouseInput(t, &t->mouse, t->center2d, event->mval, use_accurate); } switch (mode) { diff --git a/source/blender/editors/transform/transform_manipulator.c b/source/blender/editors/transform/transform_manipulator.c index e141724f2df..0a984d90ae3 100644 --- a/source/blender/editors/transform/transform_manipulator.c +++ b/source/blender/editors/transform/transform_manipulator.c @@ -1724,14 +1724,14 @@ static int manipulator_selectbuf(ScrArea *sa, ARegion *ar, const int mval[2], fl { View3D *v3d = sa->spacedata.first; RegionView3D *rv3d = ar->regiondata; - rctf rect, selrect; + rcti rect; GLuint buffer[64]; // max 4 items per select, so large enuf short hits; const bool is_picksel = true; const bool do_passes = GPU_select_query_check_active(); /* XXX check a bit later on this... (ton) */ - extern void view3d_winmatrix_set(ARegion *ar, View3D *v3d, rctf *rect); + extern void view3d_winmatrix_set(ARegion *ar, View3D *v3d, const rcti *rect); /* when looking through a selected camera, the manipulator can be at the * exact same position as the view, skip so we don't break selection */ @@ -1743,15 +1743,13 @@ static int manipulator_selectbuf(ScrArea *sa, ARegion *ar, const int mval[2], fl rect.ymin = mval[1] - hotspot; rect.ymax = mval[1] + hotspot; - selrect = rect; - view3d_winmatrix_set(ar, v3d, &rect); mul_m4_m4m4(rv3d->persmat, rv3d->winmat, rv3d->viewmat); if (do_passes) - GPU_select_begin(buffer, 64, &selrect, GPU_SELECT_NEAREST_FIRST_PASS, 0); + GPU_select_begin(buffer, 64, &rect, GPU_SELECT_NEAREST_FIRST_PASS, 0); else - GPU_select_begin(buffer, 64, &selrect, GPU_SELECT_ALL, 0); + GPU_select_begin(buffer, 64, &rect, GPU_SELECT_ALL, 0); /* do the drawing */ if (v3d->twtype & V3D_MANIP_ROTATE) { @@ -1766,7 +1764,7 @@ static int manipulator_selectbuf(ScrArea *sa, ARegion *ar, const int mval[2], fl hits = GPU_select_end(); if (do_passes) { - GPU_select_begin(buffer, 64, &selrect, GPU_SELECT_NEAREST_SECOND_PASS, hits); + GPU_select_begin(buffer, 64, &rect, GPU_SELECT_NEAREST_SECOND_PASS, hits); /* do the drawing */ if (v3d->twtype & V3D_MANIP_ROTATE) { @@ -1826,6 +1824,23 @@ static int manipulator_selectbuf(ScrArea *sa, ARegion *ar, const int mval[2], fl return 0; } +static const char *manipulator_get_operator_name(int man_val) +{ + if (man_val & MAN_TRANS_C) { + return "TRANSFORM_OT_translate"; + } + else if (man_val == MAN_ROT_T) { + return "TRANSFORM_OT_trackball"; + } + else if (man_val & MAN_ROT_C) { + return "TRANSFORM_OT_rotate"; + } + else if (man_val & MAN_SCALE_C) { + return "TRANSFORM_OT_resize"; + } + + return NULL; +} /* return 0; nothing happened */ int BIF_do_manipulator(bContext *C, const struct wmEvent *event, wmOperator *op) @@ -1846,11 +1861,24 @@ int BIF_do_manipulator(bContext *C, const struct wmEvent *event, wmOperator *op) // find the hotspots first test narrow hotspot val = manipulator_selectbuf(sa, ar, event->mval, 0.5f * (float)U.tw_hotspot); if (val) { + wmOperatorType *ot; + PointerRNA props_ptr; + PropertyRNA *prop; + const char *opname; // drawflags still global, for drawing call above drawflags = manipulator_selectbuf(sa, ar, event->mval, 0.2f * (float)U.tw_hotspot); if (drawflags == 0) drawflags = val; + /* Planar constraint doesn't make sense for rotation, give other keymaps a chance */ + if ((drawflags & MAN_ROT_C) && use_planar) { + return 0; + } + + opname = manipulator_get_operator_name(drawflags); + ot = WM_operatortype_find(opname, true); + WM_operator_properties_create_ptr(&props_ptr, ot); + if (drawflags & MAN_TRANS_C) { switch (drawflags) { case MAN_TRANS_C: @@ -1880,8 +1908,7 @@ int BIF_do_manipulator(bContext *C, const struct wmEvent *event, wmOperator *op) constraint_axis[2] = 1; break; } - RNA_boolean_set_array(op->ptr, "constraint_axis", constraint_axis); - WM_operator_name_call(C, "TRANSFORM_OT_translate", WM_OP_INVOKE_DEFAULT, op->ptr); + RNA_boolean_set_array(&props_ptr, "constraint_axis", constraint_axis); } else if (drawflags & MAN_SCALE_C) { switch (drawflags) { @@ -1910,22 +1937,10 @@ int BIF_do_manipulator(bContext *C, const struct wmEvent *event, wmOperator *op) constraint_axis[2] = 1; break; } - RNA_boolean_set_array(op->ptr, "constraint_axis", constraint_axis); - WM_operator_name_call(C, "TRANSFORM_OT_resize", WM_OP_INVOKE_DEFAULT, op->ptr); + RNA_boolean_set_array(&props_ptr, "constraint_axis", constraint_axis); } - else if (drawflags == MAN_ROT_T) { /* trackball need special case, init is different */ - /* Do not pass op->ptr!!! trackball has no "constraint" properties! - * See [#34621], it's a miracle it did not cause more problems!!! */ - /* However, we need to copy the "release_confirm" property, but only if defined, see T41112. */ - PointerRNA props_ptr; - PropertyRNA *prop; - wmOperatorType *ot = WM_operatortype_find("TRANSFORM_OT_trackball", true); - WM_operator_properties_create_ptr(&props_ptr, ot); - if ((prop = RNA_struct_find_property(op->ptr, "release_confirm")) && RNA_property_is_set(op->ptr, prop)) { - RNA_property_boolean_set(&props_ptr, prop, RNA_property_boolean_get(op->ptr, prop)); - } - WM_operator_name_call_ptr(C, ot, WM_OP_INVOKE_DEFAULT, &props_ptr); - WM_operator_properties_free(&props_ptr); + else if (drawflags == MAN_ROT_T) { + /* pass */ } else if (drawflags & MAN_ROT_C) { switch (drawflags) { @@ -1939,9 +1954,25 @@ int BIF_do_manipulator(bContext *C, const struct wmEvent *event, wmOperator *op) constraint_axis[2] = 1; break; } - RNA_boolean_set_array(op->ptr, "constraint_axis", constraint_axis); - WM_operator_name_call(C, "TRANSFORM_OT_rotate", WM_OP_INVOKE_DEFAULT, op->ptr); + RNA_boolean_set_array(&props_ptr, "constraint_axis", constraint_axis); + } + + /* pass operator properties on to transform operators */ + prop = RNA_struct_find_property(op->ptr, "use_accurate"); + if (RNA_property_is_set(op->ptr, prop)) { + RNA_property_boolean_set(&props_ptr, prop, RNA_property_boolean_get(op->ptr, prop)); + } + prop = RNA_struct_find_property(op->ptr, "release_confirm"); + if (RNA_property_is_set(op->ptr, prop)) { + RNA_property_boolean_set(&props_ptr, prop, RNA_property_boolean_get(op->ptr, prop)); } + prop = RNA_struct_find_property(op->ptr, "constraint_orientation"); + if (RNA_property_is_set(op->ptr, prop)) { + RNA_property_enum_set(&props_ptr, prop, RNA_property_enum_get(op->ptr, prop)); + } + + WM_operator_name_call_ptr(C, ot, WM_OP_INVOKE_DEFAULT, &props_ptr); + WM_operator_properties_free(&props_ptr); } /* after transform, restore drawflags */ drawflags = 0xFFFF; diff --git a/source/blender/editors/transform/transform_ops.c b/source/blender/editors/transform/transform_ops.c index cbe58ddf586..2a97384cf7d 100644 --- a/source/blender/editors/transform/transform_ops.c +++ b/source/blender/editors/transform/transform_ops.c @@ -569,6 +569,9 @@ void Transform_Properties(struct wmOperatorType *ot, int flags) // Add confirm method all the time. At the end because it's not really that important and should be hidden only in log, not in keymap edit /*prop =*/ RNA_def_boolean(ot->srna, "release_confirm", 0, "Confirm on Release", "Always confirm operation when releasing button"); //RNA_def_property_flag(prop, PROP_HIDDEN); + + prop = RNA_def_boolean(ot->srna, "use_accurate", 0, "Accurate", "Use accurate transformation"); + RNA_def_property_flag(prop, PROP_HIDDEN); } } diff --git a/source/blender/editors/transform/transform_snap_object.c b/source/blender/editors/transform/transform_snap_object.c index 7c9dc43dbe4..cf16bb8817d 100644 --- a/source/blender/editors/transform/transform_snap_object.c +++ b/source/blender/editors/transform/transform_snap_object.c @@ -87,6 +87,8 @@ typedef struct SnapObjectData { typedef struct SnapObjectData_Mesh { SnapObjectData sd; BVHTreeFromMesh *bvh_trees[3]; + MPoly *mpoly; + bool poly_allocated; } SnapObjectData_Mesh; @@ -1051,7 +1053,6 @@ static int dm_looptri_to_poly_index(DerivedMesh *dm, const MLoopTri *lt) static bool snapDerivedMesh( SnapObjectContext *sctx, SnapData *snapdata, Object *ob, DerivedMesh *dm, float obmat[4][4], const unsigned int ob_index, - bool do_bb, /* read/write args */ float *ray_depth, float *dist_px, /* return args */ @@ -1112,39 +1113,31 @@ static bool snapDerivedMesh( copy_v3_v3(ray_org_local, snapdata->ray_origin); mul_m4_v3(imat, ray_org_local); - if (do_bb) { - BoundBox *bb = BKE_object_boundbox_get(ob); - - if (bb) { - BoundBox bb_temp; - - /* We cannot afford a bounding box with some null dimension, which may happen in some cases... - * Threshold is rather high, but seems to be needed to get good behavior, see T46099. */ - bb = BKE_boundbox_ensure_minimum_dimensions(bb, &bb_temp, 1e-1f); - - /* In vertex and edges you need to get the pixel distance from ray to BoundBox, see T46816. */ - if (ELEM(snapdata->snap_to, SCE_SNAP_MODE_VERTEX, SCE_SNAP_MODE_EDGE)) { - float dist_px_sq = dist_squared_to_projected_aabb_simple( - lpmat, snapdata->win_half, ray_min_dist, snapdata->mval, - ray_org_local, ray_normal_local, bb->vec[0], bb->vec[6]); - if (dist_px_sq > SQUARE(*dist_px)) - { - return retval; - } + /* Test BoundBox */ + BoundBox *bb = BKE_object_boundbox_get(ob); + if (bb) { + /* In vertex and edges you need to get the pixel distance from ray to BoundBox, see: T46099, T46816 */ + if (ELEM(snapdata->snap_to, SCE_SNAP_MODE_VERTEX, SCE_SNAP_MODE_EDGE)) { + float dist_px_sq = dist_squared_to_projected_aabb_simple( + lpmat, snapdata->win_half, ray_min_dist, snapdata->mval, + ray_org_local, ray_normal_local, bb->vec[0], bb->vec[6]); + if (dist_px_sq > SQUARE(*dist_px)) + { + return retval; } - else { - /* was BKE_boundbox_ray_hit_check, see: cf6ca226fa58 */ - if (!isect_ray_aabb_v3_simple( - ray_start_local, ray_normal_local, bb->vec[0], bb->vec[6], NULL, NULL)) - { - return retval; - } + } + else { + /* was BKE_boundbox_ray_hit_check, see: cf6ca226fa58 */ + if (!isect_ray_aabb_v3_simple( + ray_start_local, ray_normal_local, bb->vec[0], bb->vec[6], NULL, NULL)) + { + return retval; } - /* was local_depth, see: T47838 */ - len_diff = dist_aabb_to_plane(bb->vec[0], bb->vec[6], ray_start_local, ray_normal_local); - if (len_diff < 0) len_diff = 0.0f; - need_ray_start_correction_init = false; } + /* was local_depth, see: T47838 */ + len_diff = dist_aabb_to_plane(bb->vec[0], bb->vec[6], ray_start_local, ray_normal_local); + if (len_diff < 0) len_diff = 0.0f; + need_ray_start_correction_init = false; } SnapObjectData_Mesh *sod = NULL; @@ -1182,6 +1175,29 @@ static bool snapDerivedMesh( if (treedata->cached && !bvhcache_has_tree(dm->bvhCache, treedata->tree)) { free_bvhtree_from_mesh(treedata); } + else { + if (!treedata->vert_allocated) { + treedata->vert = DM_get_vert_array(dm, &treedata->vert_allocated); + } + if ((tree_index == 1) && !treedata->edge_allocated) { + treedata->edge = DM_get_edge_array(dm, &treedata->vert_allocated); + } + if (tree_index == 2) { + if (!treedata->loop_allocated) { + treedata->loop = DM_get_loop_array(dm, &treedata->loop_allocated); + } + if (!treedata->looptri_allocated) { + if (!sod->poly_allocated) { + sod->mpoly = DM_get_poly_array(dm, &sod->poly_allocated); + } + treedata->looptri = DM_get_looptri_array( + dm, treedata->vert, + sod->mpoly, dm->getNumPolys(dm), + treedata->loop, dm->getNumLoops(dm), + &treedata->looptri_allocated); + } + } + } } } @@ -1295,10 +1311,17 @@ static bool snapDerivedMesh( } /* SCE_SNAP_MODE_VERTEX or SCE_SNAP_MODE_EDGE */ else { + + /* Warning: the depth_max is currently being used only in perspective view. + * It is not correct to limit the maximum depth for elements obtained with nearest + * since this limitation depends on the normal and the size of the occlusion face. + * And more... ray_depth is being confused with Z-depth here... (varies only the precision) */ + const float ray_depth_max_global = *ray_depth + snapdata->depth_range[0]; + Nearest2dUserData neasrest2d = { .dist_px_sq = SQUARE(*dist_px), .r_axis_closest = {1.0f, 1.0f, 1.0f}, - .depth_range = {snapdata->depth_range[0], *ray_depth + snapdata->depth_range[0]}, + .depth_range = {snapdata->depth_range[0], ray_depth_max_global}, .userdata = treedata, .get_edge_verts = (Nearest2DGetEdgeVertsCallback)get_dm_edge_verts, .copy_vert_no = (Nearest2DCopyVertNoCallback)copy_dm_vert_no, @@ -1650,7 +1673,6 @@ static bool snapObject( } retval = snapDerivedMesh( sctx, snapdata, ob, dm, obmat, ob_index, - true, ray_depth, dist_px, r_loc, r_no, r_index, r_hit_list); @@ -1858,6 +1880,9 @@ static void snap_object_data_free(void *sod_v) free_bvhtree_from_mesh(sod->bvh_trees[i]); } } + if (sod->poly_allocated) { + MEM_freeN(sod->mpoly); + } break; } case SNAP_EDIT_MESH: diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index 8885209ce01..885ff2ff159 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -57,6 +57,8 @@ set(SRC intern/gpu_init_exit.c intern/gpu_material.c intern/gpu_select.c + intern/gpu_select_pick.c + intern/gpu_select_sample_query.c intern/gpu_shader.c intern/gpu_texture.c @@ -97,6 +99,7 @@ set(SRC GPU_texture.h intern/gpu_codegen.h intern/gpu_private.h + intern/gpu_select_private.h ) data_to_c_simple(shaders/gpu_shader_geometry.glsl SRC) diff --git a/source/blender/gpu/GPU_select.h b/source/blender/gpu/GPU_select.h index 6a16b5b7456..cf5b8bf7d8f 100644 --- a/source/blender/gpu/GPU_select.h +++ b/source/blender/gpu/GPU_select.h @@ -30,19 +30,30 @@ #ifndef __GPU_SELECT_H__ #define __GPU_SELECT_H__ -#include "DNA_vec_types.h" /* rcft */ #include "BLI_sys_types.h" +struct rcti; + /* flags for mode of operation */ enum { GPU_SELECT_ALL = 1, + /* gpu_select_query */ GPU_SELECT_NEAREST_FIRST_PASS = 2, GPU_SELECT_NEAREST_SECOND_PASS = 3, + /* gpu_select_pick */ + GPU_SELECT_PICK_ALL = 4, + GPU_SELECT_PICK_NEAREST = 5, }; -void GPU_select_begin(unsigned int *buffer, unsigned int bufsize, rctf *input, char mode, int oldhits); +void GPU_select_begin(unsigned int *buffer, unsigned int bufsize, const struct rcti *input, char mode, int oldhits); bool GPU_select_load_id(unsigned int id); unsigned int GPU_select_end(void); bool GPU_select_query_check_active(void); +/* cache selection region */ +bool GPU_select_is_cached(void); +void GPU_select_cache_begin(void); +void GPU_select_cache_load_id(void); +void GPU_select_cache_end(void); + #endif diff --git a/source/blender/gpu/intern/gpu_select.c b/source/blender/gpu/intern/gpu_select.c index 58582232cd5..9496ff137dc 100644 --- a/source/blender/gpu/intern/gpu_select.c +++ b/source/blender/gpu/intern/gpu_select.c @@ -29,109 +29,86 @@ * Interface for accessing gpu-related methods for selection. The semantics will be * similar to glRenderMode(GL_SELECT) since the goal is to maintain compatibility. */ +#include <stdlib.h> + #include "GPU_select.h" #include "GPU_extensions.h" #include "GPU_glew.h" - + #include "MEM_guardedalloc.h" #include "DNA_userdef_types.h" #include "BLI_utildefines.h" -/* Ad hoc number of queries to allocate to skip doing many glGenQueries */ -#define ALLOC_QUERIES 200 - -typedef struct GPUQueryState { +#include "gpu_select_private.h" + +/* Internal algorithm used */ +enum { + /** GL_SELECT, legacy OpenGL selection */ + ALGO_GL_LEGACY = 1, + /** glBegin/EndQuery(GL_SAMPLES_PASSED... ), `gpu_select_query.c` + * Only sets 4th component (ID) correctly. */ + ALGO_GL_QUERY = 2, + /** Read depth buffer for every drawing pass and extract depths, `gpu_select_pick.c` + * Only sets 4th component (ID) correctly. */ + ALGO_GL_PICK = 3, +}; + +typedef struct GPUSelectState { /* To ignore selection id calls when not initialized */ bool select_is_active; - /* Tracks whether a query has been issued so that gpu_load_id can end the previous one */ - bool query_issued; - /* array holding the OpenGL query identifiers */ - unsigned int *queries; - /* array holding the id corresponding to each query */ - unsigned int *id; - /* number of queries in *queries and *id */ - unsigned int num_of_queries; - /* index to the next query to start */ - unsigned int active_query; /* flag to cache user preference for occlusion based selection */ bool use_gpu_select; - /* cache on initialization */ - unsigned int *buffer; - /* buffer size (stores number of integers, for actual size multiply by sizeof integer)*/ - unsigned int bufsize; /* mode of operation */ char mode; - unsigned int index; - int oldhits; -} GPUQueryState; + /* internal algorithm for selection */ + char algorithm; + /* allow GPU_select_begin/end without drawing */ + bool use_cache; +} GPUSelectState; -static GPUQueryState g_query_state = {0}; +static GPUSelectState g_select_state = {0}; /** * initialize and provide buffer for results */ -void GPU_select_begin(unsigned int *buffer, unsigned int bufsize, rctf *input, char mode, int oldhits) +void GPU_select_begin(unsigned int *buffer, unsigned int bufsize, const rcti *input, char mode, int oldhits) { - g_query_state.select_is_active = true; - g_query_state.query_issued = false; - g_query_state.active_query = 0; - g_query_state.use_gpu_select = GPU_select_query_check_active(); - g_query_state.num_of_queries = 0; - g_query_state.bufsize = bufsize; - g_query_state.buffer = buffer; - g_query_state.mode = mode; - g_query_state.index = 0; - g_query_state.oldhits = oldhits; + g_select_state.select_is_active = true; + g_select_state.use_gpu_select = GPU_select_query_check_active(); + g_select_state.mode = mode; - if (!g_query_state.use_gpu_select) { - glSelectBuffer(bufsize, (GLuint *)buffer); - glRenderMode(GL_SELECT); - glInitNames(); - glPushName(-1); + if (ELEM(g_select_state.mode, GPU_SELECT_PICK_ALL, GPU_SELECT_PICK_NEAREST)) { + g_select_state.algorithm = ALGO_GL_PICK; + } + else if (!g_select_state.use_gpu_select) { + g_select_state.algorithm = ALGO_GL_LEGACY; } else { - float viewport[4]; - - g_query_state.num_of_queries = ALLOC_QUERIES; - - g_query_state.queries = MEM_mallocN(g_query_state.num_of_queries * sizeof(*g_query_state.queries), "gpu selection queries"); - g_query_state.id = MEM_mallocN(g_query_state.num_of_queries * sizeof(*g_query_state.id), "gpu selection ids"); - glGenQueries(g_query_state.num_of_queries, g_query_state.queries); - - glPushAttrib(GL_DEPTH_BUFFER_BIT | GL_VIEWPORT_BIT); - /* disable writing to the framebuffer */ - glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); - - /* In order to save some fill rate we minimize the viewport using rect. - * We need to get the region of the scissor so that our geometry doesn't - * get rejected before the depth test. Should probably cull rect against - * scissor for viewport but this is a rare case I think */ - glGetFloatv(GL_SCISSOR_BOX, viewport); - if (!input || input->xmin == input->xmax) { - glViewport(viewport[0], viewport[1], 24, 24); - } - else { - glViewport(viewport[0], viewport[1], (int)(input->xmax - input->xmin), (int)(input->ymax - input->ymin)); - } + g_select_state.algorithm = ALGO_GL_QUERY; + } - /* occlusion queries operates on fragments that pass tests and since we are interested on all - * objects in the view frustum independently of their order, we need to disable the depth test */ - if (mode == GPU_SELECT_ALL) { - glDisable(GL_DEPTH_TEST); - glDepthMask(GL_FALSE); + switch (g_select_state.algorithm) { + case ALGO_GL_LEGACY: + { + g_select_state.use_cache = false; + glSelectBuffer(bufsize, (GLuint *)buffer); + glRenderMode(GL_SELECT); + glInitNames(); + glPushName(-1); + break; } - else if (mode == GPU_SELECT_NEAREST_FIRST_PASS) { - glClear(GL_DEPTH_BUFFER_BIT); - glEnable(GL_DEPTH_TEST); - glDepthMask(GL_TRUE); - glDepthFunc(GL_LEQUAL); + case ALGO_GL_QUERY: + { + g_select_state.use_cache = false; + gpu_select_query_begin((unsigned int (*)[4])buffer, bufsize / 4, input, mode, oldhits); + break; } - else if (mode == GPU_SELECT_NEAREST_SECOND_PASS) { - glEnable(GL_DEPTH_TEST); - glDepthMask(GL_FALSE); - glDepthFunc(GL_EQUAL); + default: /* ALGO_GL_PICK */ + { + gpu_select_pick_begin((unsigned int (*)[4])buffer, bufsize / 4, input, mode); + break; } } } @@ -146,41 +123,24 @@ void GPU_select_begin(unsigned int *buffer, unsigned int bufsize, rctf *input, c bool GPU_select_load_id(unsigned int id) { /* if no selection mode active, ignore */ - if (!g_query_state.select_is_active) + if (!g_select_state.select_is_active) return true; - if (!g_query_state.use_gpu_select) { - glLoadName(id); - } - else { - if (g_query_state.query_issued) { - glEndQuery(GL_SAMPLES_PASSED); + switch (g_select_state.algorithm) { + case ALGO_GL_LEGACY: + { + glLoadName(id); + return true; } - /* if required, allocate extra queries */ - if (g_query_state.active_query == g_query_state.num_of_queries) { - g_query_state.num_of_queries += ALLOC_QUERIES; - g_query_state.queries = MEM_reallocN(g_query_state.queries, g_query_state.num_of_queries * sizeof(*g_query_state.queries)); - g_query_state.id = MEM_reallocN(g_query_state.id, g_query_state.num_of_queries * sizeof(*g_query_state.id)); - glGenQueries(ALLOC_QUERIES, &g_query_state.queries[g_query_state.active_query]); + case ALGO_GL_QUERY: + { + return gpu_select_query_load_id(id); } - - glBeginQuery(GL_SAMPLES_PASSED, g_query_state.queries[g_query_state.active_query]); - g_query_state.id[g_query_state.active_query] = id; - g_query_state.active_query++; - g_query_state.query_issued = true; - - if (g_query_state.mode == GPU_SELECT_NEAREST_SECOND_PASS && g_query_state.index < g_query_state.oldhits) { - if (g_query_state.buffer[g_query_state.index * 4 + 3] == id) { - g_query_state.index++; - return true; - } - else { - return false; - } + default: /* ALGO_GL_PICK */ + { + return gpu_select_pick_load_id(id); } } - - return true; } /** @@ -191,59 +151,27 @@ bool GPU_select_load_id(unsigned int id) unsigned int GPU_select_end(void) { unsigned int hits = 0; - if (!g_query_state.use_gpu_select) { - glPopName(); - hits = glRenderMode(GL_RENDER); - } - else { - int i; - if (g_query_state.query_issued) { - glEndQuery(GL_SAMPLES_PASSED); + switch (g_select_state.algorithm) { + case ALGO_GL_LEGACY: + { + glPopName(); + hits = glRenderMode(GL_RENDER); + break; } - - for (i = 0; i < g_query_state.active_query; i++) { - unsigned int result; - glGetQueryObjectuiv(g_query_state.queries[i], GL_QUERY_RESULT, &result); - if (result > 0) { - if (g_query_state.mode != GPU_SELECT_NEAREST_SECOND_PASS) { - int maxhits = g_query_state.bufsize / 4; - - if (hits < maxhits) { - g_query_state.buffer[hits * 4] = 1; - g_query_state.buffer[hits * 4 + 1] = 0xFFFF; - g_query_state.buffer[hits * 4 + 2] = 0xFFFF; - g_query_state.buffer[hits * 4 + 3] = g_query_state.id[i]; - - hits++; - } - else { - hits = -1; - break; - } - } - else { - int j; - /* search in buffer and make selected object first */ - for (j = 0; j < g_query_state.oldhits; j++) { - if (g_query_state.buffer[j * 4 + 3] == g_query_state.id[i]) { - g_query_state.buffer[j * 4 + 1] = 0; - g_query_state.buffer[j * 4 + 2] = 0; - } - } - break; - } - } + case ALGO_GL_QUERY: + { + hits = gpu_select_query_end(); + break; + } + default: /* ALGO_GL_PICK */ + { + hits = gpu_select_pick_end(); + break; } - - glDeleteQueries(g_query_state.num_of_queries, g_query_state.queries); - MEM_freeN(g_query_state.queries); - MEM_freeN(g_query_state.id); - glPopAttrib(); - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); } - g_query_state.select_is_active = false; + g_select_state.select_is_active = false; return hits; } @@ -260,3 +188,41 @@ bool GPU_select_query_check_active(void) GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_UNIX, GPU_DRIVER_OPENSOURCE)))); } + +/* ---------------------------------------------------------------------------- + * Caching + * + * Support multiple begin/end's as long as they are within the initial region. + * Currently only used by ALGO_GL_PICK. + */ + +void GPU_select_cache_begin(void) +{ + /* validate on GPU_select_begin, clear if not supported */ + BLI_assert(g_select_state.use_cache == false); + g_select_state.use_cache = true; + if (g_select_state.algorithm == ALGO_GL_PICK) { + gpu_select_pick_cache_begin(); + } +} + +void GPU_select_cache_load_id(void) +{ + BLI_assert(g_select_state.use_cache == true); + if (g_select_state.algorithm == ALGO_GL_PICK) { + gpu_select_pick_cache_load_id(); + } +} + +void GPU_select_cache_end(void) +{ + if (g_select_state.algorithm == ALGO_GL_PICK) { + gpu_select_pick_cache_end(); + } + g_select_state.use_cache = false; +} + +bool GPU_select_is_cached(void) +{ + return g_select_state.use_cache && gpu_select_pick_is_cached(); +} diff --git a/source/blender/gpu/intern/gpu_select_pick.c b/source/blender/gpu/intern/gpu_select_pick.c new file mode 100644 index 00000000000..31f82fd002d --- /dev/null +++ b/source/blender/gpu/intern/gpu_select_pick.c @@ -0,0 +1,718 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2017 Blender Foundation. + * All rights reserved. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/gpu/intern/gpu_select_pick.c + * \ingroup gpu + * + * Custom select code for picking small regions (not efficient for large regions). + * `gpu_select_pick_*` API. + */ +#include <string.h> +#include <stdlib.h> +#include <float.h> + +#include "GPU_select.h" +#include "GPU_extensions.h" +#include "GPU_glew.h" + +#include "MEM_guardedalloc.h" + +#include "BLI_rect.h" +#include "BLI_listbase.h" +#include "BLI_math_vector.h" +#include "BLI_utildefines.h" + +#include "gpu_select_private.h" + +/* #define DEBUG_PRINT */ + +/* Alloc number for depths */ +#define ALLOC_DEPTHS 200 + +/* Z-depth of cleared depth buffer */ +#define DEPTH_MAX 0xffffffff + +/* ---------------------------------------------------------------------------- + * SubRectStride + */ + +/* For looping over a sub-region of a rect, could be moved into 'rct.c'*/ +typedef struct SubRectStride { + unsigned int start; /* start here */ + unsigned int span; /* read these */ + unsigned int span_len; /* len times (read span 'len' times). */ + unsigned int skip; /* skip those */ +} SubRectStride; + +/* we may want to change back to float if uint isn't well supported */ +typedef unsigned int depth_t; + +/** + * Calculate values needed for looping over a sub-region (smaller buffer within a larger buffer). + * + * 'src' must be bigger than 'dst'. + */ +static void rect_subregion_stride_calc(const rcti *src, const rcti *dst, SubRectStride *r_sub) +{ + const int src_x = BLI_rcti_size_x(src); + // const int src_y = BLI_rcti_size_y(src); + const int dst_x = BLI_rcti_size_x(dst); + const int dst_y = BLI_rcti_size_y(dst); + const int x = dst->xmin - src->xmin; + const int y = dst->ymin - src->ymin; + + BLI_assert(src->xmin <= dst->xmin && src->ymin <= dst->ymin && + src->ymax >= dst->ymax && src->ymax >= dst->ymax); + BLI_assert(x >= 0 && y >= 0); + + r_sub->start = (src_x * y) + x; + r_sub->span = dst_x; + r_sub->span_len = dst_y; + r_sub->skip = src_x - dst_x; +} + +/* ---------------------------------------------------------------------------- + * DepthBufCache + * + * Result of reading glReadPixels, + * use for both cache and non-cached storage. + */ + +/* store result of glReadPixels */ +typedef struct DepthBufCache { + struct DepthBufCache *next, *prev; + unsigned int id; + depth_t buf[0]; +} DepthBufCache; + +static DepthBufCache *depth_buf_malloc(unsigned int rect_len) +{ + DepthBufCache *rect = MEM_mallocN(sizeof(DepthBufCache) + sizeof(depth_t) * rect_len, __func__); + rect->id = SELECT_ID_NONE; + return rect; +} + +static bool depth_buf_rect_depth_any( + const DepthBufCache *rect_depth, + unsigned int rect_len) +{ + const depth_t *curr = rect_depth->buf; + for (unsigned int i = 0; i < rect_len; i++, curr++) { + if (*curr != DEPTH_MAX) { + return true; + } + } + return false; +} + +static bool depth_buf_subrect_depth_any( + const DepthBufCache *rect_depth, + const SubRectStride *sub_rect) +{ + const depth_t *curr = rect_depth->buf + sub_rect->start; + for (unsigned int i = 0; i < sub_rect->span_len; i++) { + const depth_t *curr_end = curr + sub_rect->span; + for (; curr < curr_end; curr++, curr++) { + if (*curr != DEPTH_MAX) { + return true; + } + } + curr += sub_rect->skip; + } + return false; +} + +static bool depth_buf_rect_not_equal( + const DepthBufCache *rect_depth_a, const DepthBufCache *rect_depth_b, + unsigned int rect_len) +{ + return memcmp(rect_depth_a->buf, rect_depth_b->buf, rect_len * sizeof(depth_t)) != 0; +} + +/** + * Both buffers are the same size, just check if the sub-rect contains any differences. + */ +static bool depth_buf_subrect_not_equal( + const DepthBufCache *rect_src, const DepthBufCache *rect_dst, + const SubRectStride *sub_rect) +{ + /* same as above but different rect sizes */ + const depth_t *prev = rect_src->buf + sub_rect->start; + const depth_t *curr = rect_dst->buf + sub_rect->start; + for (unsigned int i = 0; i < sub_rect->span_len; i++) { + const depth_t *curr_end = curr + sub_rect->span; + for (; curr < curr_end; prev++, curr++) { + if (*prev != *curr) { + return true; + } + } + prev += sub_rect->skip; + curr += sub_rect->skip; + } + return false; +} + +/* ---------------------------------------------------------------------------- + * DepthID + * + * Internal structure for storing hits. + */ + +typedef struct DepthID { + unsigned int id; + depth_t depth; +} DepthID; + +static int depth_id_cmp(const void *v1, const void *v2) +{ + const DepthID *d1 = v1, *d2 = v2; + if (d1->id < d2->id) { + return -1; + } + else if (d1->id > d2->id) { + return 1; + } + else { + return 0; + } +} + +static int depth_cmp(const void *v1, const void *v2) +{ + const DepthID *d1 = v1, *d2 = v2; + if (d1->depth < d2->depth) { + return -1; + } + else if (d1->depth > d2->depth) { + return 1; + } + else { + return 0; + } +} + +/* depth sorting */ +typedef struct GPUPickState { + /* cache on initialization */ + unsigned int (*buffer)[4]; + + /* buffer size (stores number of integers, for actual size multiply by sizeof integer)*/ + unsigned int bufsize; + /* mode of operation */ + char mode; + + /* OpenGL drawing, never use when (is_cached == true). */ + struct { + /* The current depth, accumulated as we draw */ + DepthBufCache *rect_depth; + /* Scratch buffer, avoid allocs every time (when not caching) */ + DepthBufCache *rect_depth_test; + + /* Pass to glReadPixels (x, y, w, h) */ + int clip_readpixels[4]; + + /* Set after first draw */ + bool is_init; + unsigned int prev_id; + } gl; + + /* src: data stored in 'cache' and 'gl', + * dst: use when cached region is smaller (where src -> dst isn't 1:1) */ + struct { + rcti clip_rect; + unsigned int rect_len; + } src, dst; + + /* Store cache between `GPU_select_cache_begin/end` */ + bool use_cache; + bool is_cached; + struct { + /* Cleanup used for iterating over both source and destination buffers: + * src.clip_rect -> dst.clip_rect */ + SubRectStride sub_rect; + + /* List of DepthBufCache, sized of 'src.clip_rect' */ + ListBase bufs; + } cache; + + /* Pickign methods */ + union { + /* GPU_SELECT_PICK_ALL */ + struct { + DepthID *hits; + unsigned int hits_len; + unsigned int hits_len_alloc; + } all; + + /* GPU_SELECT_PICK_NEAREST */ + struct { + unsigned int *rect_id; + } nearest; + }; +} GPUPickState; + + +static GPUPickState g_pick_state = {0}; + +void gpu_select_pick_begin( + unsigned int (*buffer)[4], unsigned int bufsize, + const rcti *input, char mode) +{ + GPUPickState *ps = &g_pick_state; + +#ifdef DEBUG_PRINT + printf("%s: mode=%d, use_cache=%d, is_cache=%d\n", __func__, mode, ps->use_cache, ps->is_cached); +#endif + + ps->bufsize = bufsize; + ps->buffer = buffer; + ps->mode = mode; + + const unsigned int rect_len = BLI_rcti_size_x(input) * BLI_rcti_size_y(input); + ps->dst.clip_rect = *input; + ps->dst.rect_len = rect_len; + + /* Restrict OpenGL operations for when we don't have cache */ + if (ps->is_cached == false) { + + glPushAttrib(GL_DEPTH_BUFFER_BIT | GL_VIEWPORT_BIT); + /* disable writing to the framebuffer */ + glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + + glEnable(GL_DEPTH_TEST); + glDepthMask(GL_TRUE); + + if (mode == GPU_SELECT_PICK_ALL) { + glDepthFunc(GL_ALWAYS); + } + else { + glDepthFunc(GL_LEQUAL); + } + + glPixelTransferi(GL_DEPTH_BIAS, 0.0); + glPixelTransferi(GL_DEPTH_SCALE, 1.0); + + + float viewport[4]; + glGetFloatv(GL_SCISSOR_BOX, viewport); + + ps->src.clip_rect = *input; + ps->src.rect_len = rect_len; + + ps->gl.clip_readpixels[0] = viewport[0]; + ps->gl.clip_readpixels[1] = viewport[1]; + ps->gl.clip_readpixels[2] = BLI_rcti_size_x(&ps->src.clip_rect); + ps->gl.clip_readpixels[3] = BLI_rcti_size_y(&ps->src.clip_rect); + + glViewport(UNPACK4(ps->gl.clip_readpixels)); + + /* It's possible we don't want to clear depth buffer, + * so existing elements are masked by current z-buffer. */ + glClear(GL_DEPTH_BUFFER_BIT); + + /* scratch buffer (read new values here) */ + ps->gl.rect_depth_test = depth_buf_malloc(rect_len); + ps->gl.rect_depth = depth_buf_malloc(rect_len); + + /* set initial 'far' value */ +#if 0 + glReadPixels(UNPACK4(ps->gl.clip_readpixels), GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, ps->gl.rect_depth->buf); +#else + for (unsigned int i = 0; i < rect_len; i++) { + ps->gl.rect_depth->buf[i] = DEPTH_MAX; + } +#endif + + ps->gl.is_init = false; + ps->gl.prev_id = 0; + } + else { + /* Using cache (ps->is_cached == true) */ + /* src.clip_rect -> dst.clip_rect */ + rect_subregion_stride_calc(&ps->src.clip_rect, &ps->dst.clip_rect, &ps->cache.sub_rect); + BLI_assert(ps->gl.rect_depth == NULL); + BLI_assert(ps->gl.rect_depth_test == NULL); + } + + if (mode == GPU_SELECT_PICK_ALL) { + ps->all.hits = MEM_mallocN(sizeof(*ps->all.hits) * ALLOC_DEPTHS, __func__); + ps->all.hits_len = 0; + ps->all.hits_len_alloc = ALLOC_DEPTHS; + } + else { + /* Set to 0xff for SELECT_ID_NONE */ + ps->nearest.rect_id = MEM_mallocN(sizeof(unsigned int) * ps->dst.rect_len, __func__); + memset(ps->nearest.rect_id, 0xff, sizeof(unsigned int) * ps->dst.rect_len); + } +} + +/** + * Given 2x depths, we know are different - update the depth information + * use for both cached/uncached depth buffers. + */ +static void gpu_select_load_id_pass_all(const DepthBufCache *rect_curr) +{ + GPUPickState *ps = &g_pick_state; + const unsigned int id = rect_curr->id; + /* find the best depth for this pass and store in 'all.hits' */ + depth_t depth_best = DEPTH_MAX; + +#define EVAL_TEST() \ + if (depth_best > *curr) { \ + depth_best = *curr; \ + } ((void)0) + + if (ps->is_cached == false) { + const depth_t *curr = rect_curr->buf; + BLI_assert(ps->src.rect_len == ps->dst.rect_len); + const unsigned int rect_len = ps->src.rect_len; + for (unsigned int i = 0; i < rect_len; i++, curr++) { + EVAL_TEST(); + } + } + else { + /* same as above but different rect sizes */ + const depth_t *curr = rect_curr->buf + ps->cache.sub_rect.start; + for (unsigned int i = 0; i < ps->cache.sub_rect.span_len; i++) { + const depth_t *curr_end = curr + ps->cache.sub_rect.span; + for (; curr < curr_end; curr++) { + EVAL_TEST(); + } + curr += ps->cache.sub_rect.skip; + } + } + +#undef EVAL_TEST + + /* ensure enough space */ + if (UNLIKELY(ps->all.hits_len == ps->all.hits_len_alloc)) { + ps->all.hits_len_alloc += ALLOC_DEPTHS; + ps->all.hits = MEM_reallocN(ps->all.hits, ps->all.hits_len_alloc * sizeof(*ps->all.hits)); + } + DepthID *d = &ps->all.hits[ps->all.hits_len++]; + d->id = id; + d->depth = depth_best; +} + +static void gpu_select_load_id_pass_nearest(const DepthBufCache *rect_prev, const DepthBufCache *rect_curr) +{ + GPUPickState *ps = &g_pick_state; + const unsigned int id = rect_curr->id; + /* keep track each pixels ID in 'nearest.rect_id' */ + if (id != SELECT_ID_NONE) { + unsigned int *id_ptr = ps->nearest.rect_id; + +#define EVAL_TEST() \ + if (*curr != *prev) { \ + *id_ptr = id; \ + } ((void)0) + + if (ps->is_cached == false) { + const depth_t *prev = rect_prev->buf; + const depth_t *curr = rect_curr->buf; + BLI_assert(ps->src.rect_len == ps->dst.rect_len); + const unsigned int rect_len = ps->src.rect_len; + for (unsigned int i = 0; i < rect_len; i++, curr++, prev++, id_ptr++) { + EVAL_TEST(); + } + } + else { + /* same as above but different rect sizes */ + const depth_t *prev = rect_prev->buf + ps->cache.sub_rect.start; + const depth_t *curr = rect_curr->buf + ps->cache.sub_rect.start; + for (unsigned int i = 0; i < ps->cache.sub_rect.span_len; i++) { + const depth_t *curr_end = curr + ps->cache.sub_rect.span; + for (; curr < curr_end; prev++, curr++, id_ptr++) { + EVAL_TEST(); + } + prev += ps->cache.sub_rect.skip; + curr += ps->cache.sub_rect.skip; + } + } + +#undef EVAL_TEST + } +} + + +bool gpu_select_pick_load_id(unsigned int id) +{ + GPUPickState *ps = &g_pick_state; + if (ps->gl.is_init) { + const unsigned int rect_len = ps->src.rect_len; + glReadPixels(UNPACK4(ps->gl.clip_readpixels), GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, ps->gl.rect_depth_test->buf); + /* perform initial check since most cases the array remains unchanged */ + + bool do_pass = false; + if (g_pick_state.mode == GPU_SELECT_PICK_ALL) { + if (depth_buf_rect_depth_any(ps->gl.rect_depth_test, rect_len)) { + ps->gl.rect_depth_test->id = ps->gl.prev_id; + gpu_select_load_id_pass_all(ps->gl.rect_depth_test); + do_pass = true; + } + } + else { + if (depth_buf_rect_not_equal(ps->gl.rect_depth, ps->gl.rect_depth_test, rect_len)) { + ps->gl.rect_depth_test->id = ps->gl.prev_id; + gpu_select_load_id_pass_nearest(ps->gl.rect_depth, ps->gl.rect_depth_test); + do_pass = true; + } + } + + if (do_pass) { + /* Store depth in cache */ + if (ps->use_cache) { + BLI_addtail(&ps->cache.bufs, ps->gl.rect_depth); + ps->gl.rect_depth = depth_buf_malloc(ps->src.rect_len); + } + + SWAP(DepthBufCache *, ps->gl.rect_depth, ps->gl.rect_depth_test); + + if (g_pick_state.mode == GPU_SELECT_PICK_ALL) { + /* we want new depths every time */ + glClear(GL_DEPTH_BUFFER_BIT); + } + } + } + + ps->gl.is_init = true; + ps->gl.prev_id = id; + + return true; +} + +unsigned int gpu_select_pick_end(void) +{ + GPUPickState *ps = &g_pick_state; + +#ifdef DEBUG_PRINT + printf("%s\n", __func__); +#endif + + if (ps->is_cached == false) { + if (ps->gl.is_init) { + /* force finishing last pass */ + gpu_select_pick_load_id(ps->gl.prev_id); + } + + glPopAttrib(); + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + } + + /* assign but never free directly since it may be in cache */ + DepthBufCache *rect_depth_final; + + /* Store depth in cache */ + if (ps->use_cache && !ps->is_cached) { + BLI_addtail(&ps->cache.bufs, ps->gl.rect_depth); + ps->gl.rect_depth = NULL; + rect_depth_final = ps->cache.bufs.last; + } + else if (ps->is_cached) { + rect_depth_final = ps->cache.bufs.last; + } + else { + /* common case, no cache */ + rect_depth_final = ps->gl.rect_depth; + } + + unsigned int maxhits = g_pick_state.bufsize; + DepthID *depth_data; + unsigned int depth_data_len = 0; + + if (g_pick_state.mode == GPU_SELECT_PICK_ALL) { + depth_data = ps->all.hits; + depth_data_len = ps->all.hits_len; + /* move ownership */ + ps->all.hits = NULL; + ps->all.hits_len = 0; + ps->all.hits_len_alloc = 0; + } + else { + /* GPU_SELECT_PICK_NEAREST */ + + /* Over alloc (unlikely we have as many depths as pixels) */ + unsigned int depth_data_len_first_pass = 0; + depth_data = MEM_mallocN(ps->dst.rect_len * sizeof(*depth_data), __func__); + + /* Partially de-duplicating copy, + * when contiguous ID's are found - update their closest depth. + * This isn't essential but means there is less data to sort. */ + +#define EVAL_TEST(i_src, i_dst) \ + { \ + const unsigned int id = ps->nearest.rect_id[i_dst]; \ + if (id != SELECT_ID_NONE) { \ + const depth_t depth = rect_depth_final->buf[i_src]; \ + if (depth_last == NULL || depth_last->id != id) { \ + DepthID *d = &depth_data[depth_data_len_first_pass++]; \ + d->id = id; \ + d->depth = depth; \ + } \ + else if (depth_last->depth > depth) { \ + depth_last->depth = depth; \ + } \ + } \ + } ((void)0) + + { + DepthID *depth_last = NULL; + if (ps->is_cached == false) { + for (unsigned int i = 0; i < ps->src.rect_len; i++) { + EVAL_TEST(i, i); + } + } + else { + /* same as above but different rect sizes */ + unsigned int i_src = ps->cache.sub_rect.start, i_dst = 0; + for (unsigned int j = 0; j < ps->cache.sub_rect.span_len; j++) { + const unsigned int i_src_end = i_src + ps->cache.sub_rect.span; + for (; i_src < i_src_end; i_src++, i_dst++) { + EVAL_TEST(i_src, i_dst); + } + i_src += ps->cache.sub_rect.skip; + } + } + } + +#undef EVAL_TEST + + qsort(depth_data, depth_data_len_first_pass, sizeof(DepthID), depth_id_cmp); + + /* Sort by ID's then keep the best depth for each ID */ + depth_data_len = 0; + { + DepthID *depth_last = NULL; + for (unsigned int i = 0; i < depth_data_len_first_pass; i++) { + if (depth_last == NULL || depth_last->id != depth_data[i].id) { + depth_last = &depth_data[depth_data_len++]; + *depth_last = depth_data[i]; + } + else if (depth_last->depth > depth_data[i].depth) { + depth_last->depth = depth_data[i].depth; + } + } + } + } + + /* Finally sort each unique (id, depth) pair by depth + * so the final hit-list is sorted by depth (nearest first) */ + unsigned int hits = 0; + + if (depth_data_len > maxhits) { + hits = -1; + } + else { + qsort(depth_data, depth_data_len, sizeof(DepthID), depth_cmp); + + for (unsigned int i = 0; i < depth_data_len; i++) { +#ifdef DEBUG_PRINT + printf(" hit: %d: depth %u\n", depth_data[i].id, depth_data[i].depth); +#endif + /* first 3 are dummy values */ + g_pick_state.buffer[hits][0] = 1; + g_pick_state.buffer[hits][1] = 0x0; + g_pick_state.buffer[hits][2] = 0x0; + g_pick_state.buffer[hits][3] = depth_data[i].id; + hits++; + } + BLI_assert(hits < maxhits); + } + + MEM_freeN(depth_data); + + MEM_SAFE_FREE(ps->gl.rect_depth); + MEM_SAFE_FREE(ps->gl.rect_depth_test); + + if (g_pick_state.mode == GPU_SELECT_PICK_ALL) { + /* 'hits' already freed as 'depth_data' */ + } + else { + MEM_freeN(ps->nearest.rect_id); + ps->nearest.rect_id = NULL; + } + + if (ps->use_cache) { + ps->is_cached = true; + } + + return hits; +} + +/* ---------------------------------------------------------------------------- + * Caching + * + * Support multiple begin/end's reusing depth buffers. + */ + +void gpu_select_pick_cache_begin(void) +{ + BLI_assert(g_pick_state.use_cache == false); +#ifdef DEBUG_PRINT + printf("%s\n", __func__); +#endif + g_pick_state.use_cache = true; + g_pick_state.is_cached = false; +} + +void gpu_select_pick_cache_end(void) +{ +#ifdef DEBUG_PRINT + printf("%s: with %d buffers\n", __func__, BLI_listbase_count(&g_pick_state.cache.bufs)); +#endif + g_pick_state.use_cache = false; + g_pick_state.is_cached = false; + + BLI_freelistN(&g_pick_state.cache.bufs); +} + +/* is drawing needed? */ +bool gpu_select_pick_is_cached(void) +{ + return g_pick_state.is_cached; +} + +void gpu_select_pick_cache_load_id(void) +{ + BLI_assert(g_pick_state.is_cached == true); + GPUPickState *ps = &g_pick_state; +#ifdef DEBUG_PRINT + printf("%s (building depth from cache)\n", __func__); +#endif + for (DepthBufCache *rect_depth = ps->cache.bufs.first; rect_depth; rect_depth = rect_depth->next) { + if (rect_depth->next != NULL) { + /* we know the buffers differ, but this sub-region may not. + * double check before adding an id-pass */ + if (g_pick_state.mode == GPU_SELECT_PICK_ALL) { + if (depth_buf_subrect_depth_any(rect_depth->next, &ps->cache.sub_rect)) { + gpu_select_load_id_pass_all(rect_depth->next); + } + } + else { + if (depth_buf_subrect_not_equal(rect_depth, rect_depth->next, &ps->cache.sub_rect)) { + gpu_select_load_id_pass_nearest(rect_depth, rect_depth->next); + } + } + } + } +} diff --git a/source/blender/gpu/intern/gpu_select_private.h b/source/blender/gpu/intern/gpu_select_private.h new file mode 100644 index 00000000000..631b8806af9 --- /dev/null +++ b/source/blender/gpu/intern/gpu_select_private.h @@ -0,0 +1,48 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2014 Blender Foundation. + * All rights reserved. + * + * Contributor(s): Antony Riakiotakis. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/gpu/intern/gpu_select_private.h + * \ingroup gpu + * + * Selection implementations. + */ + +/* gpu_select_pick */ +void gpu_select_pick_begin(unsigned int (*buffer)[4], unsigned int bufsize, const rcti *input, char mode); +bool gpu_select_pick_load_id(unsigned int id); +unsigned int gpu_select_pick_end(void); + +void gpu_select_pick_cache_begin(void); +void gpu_select_pick_cache_end(void); +bool gpu_select_pick_is_cached(void); +void gpu_select_pick_cache_load_id(void); + +/* gpu_select_sample_query */ +void gpu_select_query_begin(unsigned int (*buffer)[4], unsigned int bufsize, const rcti *input, char mode, int oldhits); +bool gpu_select_query_load_id(unsigned int id); +unsigned int gpu_select_query_end(void); + + +#define SELECT_ID_NONE ((unsigned int)0xffffffff) diff --git a/source/blender/gpu/intern/gpu_select_sample_query.c b/source/blender/gpu/intern/gpu_select_sample_query.c new file mode 100644 index 00000000000..5576367edd9 --- /dev/null +++ b/source/blender/gpu/intern/gpu_select_sample_query.c @@ -0,0 +1,209 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2014 Blender Foundation. + * All rights reserved. + * + * Contributor(s): Antony Riakiotakis. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** \file blender/gpu/intern/gpu_select.c + * \ingroup gpu + * + * Interface for accessing gpu-related methods for selection. The semantics will be + * similar to glRenderMode(GL_SELECT) since the goal is to maintain compatibility. + */ + +#include <stdlib.h> + +#include "GPU_select.h" +#include "GPU_extensions.h" +#include "GPU_glew.h" + +#include "MEM_guardedalloc.h" + +#include "BLI_rect.h" + +#include "BLI_utildefines.h" + +#include "gpu_select_private.h" + + +/* Ad hoc number of queries to allocate to skip doing many glGenQueries */ +#define ALLOC_QUERIES 200 + +typedef struct GPUQueryState { + /* Tracks whether a query has been issued so that gpu_load_id can end the previous one */ + bool query_issued; + /* array holding the OpenGL query identifiers */ + unsigned int *queries; + /* array holding the id corresponding to each query */ + unsigned int *id; + /* number of queries in *queries and *id */ + unsigned int num_of_queries; + /* index to the next query to start */ + unsigned int active_query; + /* cache on initialization */ + unsigned int (*buffer)[4]; + /* buffer size (stores number of integers, for actual size multiply by sizeof integer)*/ + unsigned int bufsize; + /* mode of operation */ + char mode; + unsigned int index; + int oldhits; +} GPUQueryState; + +static GPUQueryState g_query_state = {0}; + + +void gpu_select_query_begin( + unsigned int (*buffer)[4], unsigned int bufsize, + const rcti *input, char mode, + int oldhits) +{ + float viewport[4]; + + g_query_state.query_issued = false; + g_query_state.active_query = 0; + g_query_state.num_of_queries = 0; + g_query_state.bufsize = bufsize; + g_query_state.buffer = buffer; + g_query_state.mode = mode; + g_query_state.index = 0; + g_query_state.oldhits = oldhits; + + g_query_state.num_of_queries = ALLOC_QUERIES; + + g_query_state.queries = MEM_mallocN(g_query_state.num_of_queries * sizeof(*g_query_state.queries), "gpu selection queries"); + g_query_state.id = MEM_mallocN(g_query_state.num_of_queries * sizeof(*g_query_state.id), "gpu selection ids"); + glGenQueries(g_query_state.num_of_queries, g_query_state.queries); + + glPushAttrib(GL_DEPTH_BUFFER_BIT | GL_VIEWPORT_BIT); + /* disable writing to the framebuffer */ + glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + + /* In order to save some fill rate we minimize the viewport using rect. + * We need to get the region of the scissor so that our geometry doesn't + * get rejected before the depth test. Should probably cull rect against + * scissor for viewport but this is a rare case I think */ + glGetFloatv(GL_SCISSOR_BOX, viewport); + glViewport(viewport[0], viewport[1], BLI_rcti_size_x(input), BLI_rcti_size_y(input)); + + /* occlusion queries operates on fragments that pass tests and since we are interested on all + * objects in the view frustum independently of their order, we need to disable the depth test */ + if (mode == GPU_SELECT_ALL) { + glDisable(GL_DEPTH_TEST); + glDepthMask(GL_FALSE); + } + else if (mode == GPU_SELECT_NEAREST_FIRST_PASS) { + glClear(GL_DEPTH_BUFFER_BIT); + glEnable(GL_DEPTH_TEST); + glDepthMask(GL_TRUE); + glDepthFunc(GL_LEQUAL); + } + else if (mode == GPU_SELECT_NEAREST_SECOND_PASS) { + glEnable(GL_DEPTH_TEST); + glDepthMask(GL_FALSE); + glDepthFunc(GL_EQUAL); + } +} + +bool gpu_select_query_load_id(unsigned int id) +{ + if (g_query_state.query_issued) { + glEndQuery(GL_SAMPLES_PASSED); + } + /* if required, allocate extra queries */ + if (g_query_state.active_query == g_query_state.num_of_queries) { + g_query_state.num_of_queries += ALLOC_QUERIES; + g_query_state.queries = MEM_reallocN(g_query_state.queries, g_query_state.num_of_queries * sizeof(*g_query_state.queries)); + g_query_state.id = MEM_reallocN(g_query_state.id, g_query_state.num_of_queries * sizeof(*g_query_state.id)); + glGenQueries(ALLOC_QUERIES, &g_query_state.queries[g_query_state.active_query]); + } + + glBeginQuery(GL_SAMPLES_PASSED, g_query_state.queries[g_query_state.active_query]); + g_query_state.id[g_query_state.active_query] = id; + g_query_state.active_query++; + g_query_state.query_issued = true; + + if (g_query_state.mode == GPU_SELECT_NEAREST_SECOND_PASS && g_query_state.index < g_query_state.oldhits) { + if (g_query_state.buffer[g_query_state.index][3] == id) { + g_query_state.index++; + return true; + } + else { + return false; + } + } + + return true; +} + +unsigned int gpu_select_query_end(void) +{ + int i; + + unsigned int hits = 0; + const unsigned int maxhits = g_query_state.bufsize; + + if (g_query_state.query_issued) { + glEndQuery(GL_SAMPLES_PASSED); + } + + for (i = 0; i < g_query_state.active_query; i++) { + unsigned int result; + glGetQueryObjectuiv(g_query_state.queries[i], GL_QUERY_RESULT, &result); + if (result > 0) { + if (g_query_state.mode != GPU_SELECT_NEAREST_SECOND_PASS) { + + if (hits < maxhits) { + g_query_state.buffer[hits][0] = 1; + g_query_state.buffer[hits][1] = 0xFFFF; + g_query_state.buffer[hits][2] = 0xFFFF; + g_query_state.buffer[hits][3] = g_query_state.id[i]; + + hits++; + } + else { + hits = -1; + break; + } + } + else { + int j; + /* search in buffer and make selected object first */ + for (j = 0; j < g_query_state.oldhits; j++) { + if (g_query_state.buffer[j][3] == g_query_state.id[i]) { + g_query_state.buffer[j][1] = 0; + g_query_state.buffer[j][2] = 0; + } + } + break; + } + } + } + + glDeleteQueries(g_query_state.num_of_queries, g_query_state.queries); + MEM_freeN(g_query_state.queries); + MEM_freeN(g_query_state.id); + glPopAttrib(); + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + + return hits; +} diff --git a/source/blender/imbuf/intern/imbuf.h b/source/blender/imbuf/intern/imbuf.h index 897a149a45c..90dad70fa61 100644 --- a/source/blender/imbuf/intern/imbuf.h +++ b/source/blender/imbuf/intern/imbuf.h @@ -67,8 +67,6 @@ # define BIG_LONG SWAP_LONG #endif -typedef unsigned char uchar; - #define IMB_DPI_DEFAULT 72.0f #endif /* __IMBUF_H__ */ diff --git a/source/blender/makesdna/DNA_modifier_types.h b/source/blender/makesdna/DNA_modifier_types.h index f95533a88f9..32b43c7ea55 100644 --- a/source/blender/makesdna/DNA_modifier_types.h +++ b/source/blender/makesdna/DNA_modifier_types.h @@ -86,6 +86,7 @@ typedef enum ModifierType { eModifierType_NormalEdit = 50, eModifierType_CorrectiveSmooth = 51, eModifierType_MeshSequenceCache = 52, + eModifierType_SurfaceDeform = 53, NUM_MODIFIER_TYPES } ModifierType; @@ -1570,6 +1571,46 @@ enum { MOD_MESHSEQ_READ_COLOR = (1 << 3), }; +typedef struct SDefBind { + unsigned int *vert_inds; + unsigned int numverts; + int mode; + float *vert_weights; + float normal_dist; + float influence; +} SDefBind; + +typedef struct SDefVert { + SDefBind *binds; + unsigned int numbinds; + char pad[4]; +} SDefVert; + +typedef struct SurfaceDeformModifierData { + ModifierData modifier; + + struct Object *target; /* bind target object */ + SDefVert *verts; /* vertex bind data */ + float falloff; + unsigned int numverts, numpoly; + int flags; + float mat[4][4]; +} SurfaceDeformModifierData; + +/* Surface Deform modifier flags */ +enum { + MOD_SDEF_BIND = (1 << 0), + MOD_SDEF_USES_LOOPTRI = (1 << 1), + MOD_SDEF_HAS_CONCAVE = (1 << 2), +}; + +/* Surface Deform vertex bind modes */ +enum { + MOD_SDEF_MODE_LOOPTRI = 0, + MOD_SDEF_MODE_NGON = 1, + MOD_SDEF_MODE_CENTROID = 2, +}; + #define MOD_MESHSEQ_READ_ALL \ (MOD_MESHSEQ_READ_VERT | MOD_MESHSEQ_READ_POLY | MOD_MESHSEQ_READ_UV | MOD_MESHSEQ_READ_COLOR) diff --git a/source/blender/makesdna/DNA_node_types.h b/source/blender/makesdna/DNA_node_types.h index fd601e55550..47677e50451 100644 --- a/source/blender/makesdna/DNA_node_types.h +++ b/source/blender/makesdna/DNA_node_types.h @@ -668,7 +668,8 @@ typedef struct NodeScriptDict { /* qdn: glare node */ typedef struct NodeGlare { char quality, type, iter; - char angle, pad_c1, size, pad[2]; + /* XXX angle is only kept for backward/forward compatibility, was used for two different things, see T50736. */ + char angle DNA_DEPRECATED, pad_c1, size, star_45, streaks; float colmod, mix, threshold, fade; float angle_ofs, pad_f1; } NodeGlare; diff --git a/source/blender/makesdna/DNA_object_force.h b/source/blender/makesdna/DNA_object_force.h index 59acefeffe4..ed14c4b9311 100644 --- a/source/blender/makesdna/DNA_object_force.h +++ b/source/blender/makesdna/DNA_object_force.h @@ -372,6 +372,7 @@ typedef struct SoftBody { #define PFIELD_DO_ROTATION (1<<15) #define PFIELD_GUIDE_PATH_WEIGHT (1<<16) /* apply curve weights */ #define PFIELD_SMOKE_DENSITY (1<<17) /* multiply smoke force by density */ +#define PFIELD_GRAVITATION (1<<18) /* used for (simple) force */ /* pd->falloff */ #define PFIELD_FALL_SPHERE 0 diff --git a/source/blender/makesdna/DNA_scene_types.h b/source/blender/makesdna/DNA_scene_types.h index 8ee15ef21a3..918d0f00040 100644 --- a/source/blender/makesdna/DNA_scene_types.h +++ b/source/blender/makesdna/DNA_scene_types.h @@ -1716,6 +1716,7 @@ typedef struct Scene { #define SCER_LOCK_FRAME_SELECTION (1<<1) /* timeline/keyframe jumping - only selected items (on by default) */ #define SCE_KEYS_NO_SELONLY (1<<2) +#define SCER_SHOW_SUBFRAME (1<<3) /* mode (int now) */ #define R_OSA 0x0001 diff --git a/source/blender/makesdna/DNA_userdef_types.h b/source/blender/makesdna/DNA_userdef_types.h index 94cc7dd9892..fc970c40c12 100644 --- a/source/blender/makesdna/DNA_userdef_types.h +++ b/source/blender/makesdna/DNA_userdef_types.h @@ -498,7 +498,6 @@ typedef struct UserDef { int prefetchframes; float pad_rot_angle; /* control the rotation step of the view when PAD2, PAD4, PAD6&PAD8 is use */ short frameserverport; - short pad4; short obcenter_dia; short rvisize; /* rotating view icon size */ short rvibright; /* rotating view icon brightness */ @@ -510,6 +509,8 @@ typedef struct UserDef { char ipo_new; /* interpolation mode for newly added F-Curves */ char keyhandles_new; /* handle types for newly added keyframes */ char gpu_select_method; + char gpu_select_pick_deph; + char pad4; char view_frame_type; int view_frame_keyframes; /* number of keyframes to zoom around current frame */ diff --git a/source/blender/makesrna/RNA_access.h b/source/blender/makesrna/RNA_access.h index 66e6f30feeb..f9aaec69ce7 100644 --- a/source/blender/makesrna/RNA_access.h +++ b/source/blender/makesrna/RNA_access.h @@ -598,6 +598,7 @@ extern StructRNA RNA_StucciTexture; extern StructRNA RNA_SubsurfModifier; extern StructRNA RNA_SunLamp; extern StructRNA RNA_SurfaceCurve; +extern StructRNA RNA_SurfaceDeformModifier; extern StructRNA RNA_SurfaceModifier; extern StructRNA RNA_TexMapping; extern StructRNA RNA_Text; diff --git a/source/blender/makesrna/intern/rna_define.c b/source/blender/makesrna/intern/rna_define.c index dc97d39052b..1d232d2df39 100644 --- a/source/blender/makesrna/intern/rna_define.c +++ b/source/blender/makesrna/intern/rna_define.c @@ -3157,8 +3157,9 @@ int rna_parameter_size(PropertyRNA *parm) StringPropertyRNA *sparm = (StringPropertyRNA *)parm; return sizeof(char) * sparm->maxlength; } - else + else { return sizeof(char *); + } case PROP_POINTER: { #ifdef RNA_RUNTIME diff --git a/source/blender/makesrna/intern/rna_mesh_api.c b/source/blender/makesrna/intern/rna_mesh_api.c index ff9873fb3d1..9b0a25560f9 100644 --- a/source/blender/makesrna/intern/rna_mesh_api.c +++ b/source/blender/makesrna/intern/rna_mesh_api.c @@ -209,6 +209,11 @@ static void rna_Mesh_flip_normals(Mesh *mesh) DAG_id_tag_update(&mesh->id, 0); } +static void rna_Mesh_split_faces(Mesh *mesh, int free_loop_normals) +{ + BKE_mesh_split_faces(mesh, free_loop_normals != 0); +} + #else void RNA_api_mesh(StructRNA *srna) @@ -240,8 +245,10 @@ void RNA_api_mesh(StructRNA *srna) func = RNA_def_function(srna, "free_normals_split", "rna_Mesh_free_normals_split"); RNA_def_function_ui_description(func, "Free split vertex normals"); - func = RNA_def_function(srna, "split_faces", "BKE_mesh_split_faces"); + func = RNA_def_function(srna, "split_faces", "rna_Mesh_split_faces"); RNA_def_function_ui_description(func, "Split faces based on the edge angle"); + RNA_def_boolean(func, "free_loop_normals", 1, "Free Loop Notmals", + "Free loop normals custom data layer"); func = RNA_def_function(srna, "calc_tangents", "rna_Mesh_calc_tangents"); RNA_def_function_flag(func, FUNC_USE_REPORTS); diff --git a/source/blender/makesrna/intern/rna_modifier.c b/source/blender/makesrna/intern/rna_modifier.c index c4f0db38a16..47c4b425155 100644 --- a/source/blender/makesrna/intern/rna_modifier.c +++ b/source/blender/makesrna/intern/rna_modifier.c @@ -105,6 +105,7 @@ EnumPropertyItem rna_enum_object_modifier_type_items[] = { {eModifierType_Shrinkwrap, "SHRINKWRAP", ICON_MOD_SHRINKWRAP, "Shrinkwrap", ""}, {eModifierType_SimpleDeform, "SIMPLE_DEFORM", ICON_MOD_SIMPLEDEFORM, "Simple Deform", ""}, {eModifierType_Smooth, "SMOOTH", ICON_MOD_SMOOTH, "Smooth", ""}, + {eModifierType_SurfaceDeform, "SURFACE_DEFORM", ICON_MOD_MESHDEFORM, "Surface Deform", ""}, {eModifierType_Warp, "WARP", ICON_MOD_WARP, "Warp", ""}, {eModifierType_Wave, "WAVE", ICON_MOD_WAVE, "Wave", ""}, {0, "", 0, N_("Simulate"), ""}, @@ -408,6 +409,8 @@ static StructRNA *rna_Modifier_refine(struct PointerRNA *ptr) return &RNA_CorrectiveSmoothModifier; case eModifierType_MeshSequenceCache: return &RNA_MeshSequenceCacheModifier; + case eModifierType_SurfaceDeform: + return &RNA_SurfaceDeformModifier; /* Default */ case eModifierType_None: case eModifierType_ShapeKey: @@ -573,6 +576,7 @@ RNA_MOD_OBJECT_SET(MeshDeform, object, OB_MESH); RNA_MOD_OBJECT_SET(NormalEdit, target, OB_EMPTY); RNA_MOD_OBJECT_SET(Shrinkwrap, target, OB_MESH); RNA_MOD_OBJECT_SET(Shrinkwrap, auxTarget, OB_MESH); +RNA_MOD_OBJECT_SET(SurfaceDeform, target, OB_MESH); static void rna_HookModifier_object_set(PointerRNA *ptr, PointerRNA value) { @@ -1131,6 +1135,11 @@ static int rna_CorrectiveSmoothModifier_is_bind_get(PointerRNA *ptr) return (csmd->bind_coords != NULL); } +static int rna_SurfaceDeformModifier_is_bound_get(PointerRNA *ptr) +{ + return (((SurfaceDeformModifierData *)ptr->data)->verts != NULL); +} + static void rna_MeshSequenceCache_object_path_update(Main *bmain, Scene *scene, PointerRNA *ptr) { #ifdef WITH_ALEMBIC @@ -4702,6 +4711,33 @@ static void rna_def_modifier_normaledit(BlenderRNA *brna) RNA_def_property_update(prop, 0, "rna_Modifier_update"); } +static void rna_def_modifier_surfacedeform(BlenderRNA *brna) +{ + StructRNA *srna; + PropertyRNA *prop; + + srna = RNA_def_struct(brna, "SurfaceDeformModifier", "Modifier"); + RNA_def_struct_ui_text(srna, "SurfaceDeform Modifier", "blablabla"); + RNA_def_struct_sdna(srna, "SurfaceDeformModifierData"); + RNA_def_struct_ui_icon(srna, ICON_MOD_MESHDEFORM); + + prop = RNA_def_property(srna, "target", PROP_POINTER, PROP_NONE); + RNA_def_property_ui_text(prop, "Target", "Mesh object to deform with"); + RNA_def_property_pointer_funcs(prop, NULL, "rna_SurfaceDeformModifier_target_set", NULL, "rna_Mesh_object_poll"); + RNA_def_property_flag(prop, PROP_EDITABLE | PROP_ID_SELF_CHECK); + RNA_def_property_update(prop, 0, "rna_Modifier_dependency_update"); + + prop = RNA_def_property(srna, "falloff", PROP_FLOAT, PROP_NONE); + RNA_def_property_range(prop, 2.0f, 16.0f); + RNA_def_property_ui_text(prop, "Interpolation falloff", "Controls how much nearby polygons influence deformation"); + RNA_def_property_update(prop, 0, "rna_Modifier_update"); + + prop = RNA_def_property(srna, "is_bound", PROP_BOOLEAN, PROP_NONE); + RNA_def_property_boolean_funcs(prop, "rna_SurfaceDeformModifier_is_bound_get", NULL); + RNA_def_property_ui_text(prop, "Bound", "Whether geometry has been bound to target mesh"); + RNA_def_property_clear_flag(prop, PROP_EDITABLE); +} + void RNA_def_modifier(BlenderRNA *brna) { StructRNA *srna; @@ -4819,6 +4855,7 @@ void RNA_def_modifier(BlenderRNA *brna) rna_def_modifier_datatransfer(brna); rna_def_modifier_normaledit(brna); rna_def_modifier_meshseqcache(brna); + rna_def_modifier_surfacedeform(brna); } #endif diff --git a/source/blender/makesrna/intern/rna_nodetree.c b/source/blender/makesrna/intern/rna_nodetree.c index b35142f2a58..784004182dd 100644 --- a/source/blender/makesrna/intern/rna_nodetree.c +++ b/source/blender/makesrna/intern/rna_nodetree.c @@ -5721,8 +5721,8 @@ static void def_cmp_glare(StructRNA *srna) RNA_def_property_update(prop, NC_NODE | NA_EDITED, "rna_Node_update"); prop = RNA_def_property(srna, "streaks", PROP_INT, PROP_NONE); - RNA_def_property_int_sdna(prop, NULL, "angle"); - RNA_def_property_range(prop, 2, 16); + RNA_def_property_int_sdna(prop, NULL, "streaks"); + RNA_def_property_range(prop, 1, 16); RNA_def_property_ui_text(prop, "Streaks", "Total number of streaks"); RNA_def_property_update(prop, NC_NODE | NA_EDITED, "rna_Node_update"); @@ -5739,7 +5739,7 @@ static void def_cmp_glare(StructRNA *srna) RNA_def_property_update(prop, NC_NODE | NA_EDITED, "rna_Node_update"); prop = RNA_def_property(srna, "use_rotate_45", PROP_BOOLEAN, PROP_NONE); - RNA_def_property_boolean_sdna(prop, NULL, "angle", 0); + RNA_def_property_boolean_sdna(prop, NULL, "star_45", 0); RNA_def_property_ui_text(prop, "Rotate 45", "Simple star filter: add 45 degree rotation offset"); RNA_def_property_update(prop, NC_NODE | NA_EDITED, "rna_Node_update"); diff --git a/source/blender/makesrna/intern/rna_object.c b/source/blender/makesrna/intern/rna_object.c index 0cffba47f16..b3c166a6810 100644 --- a/source/blender/makesrna/intern/rna_object.c +++ b/source/blender/makesrna/intern/rna_object.c @@ -1321,8 +1321,12 @@ static void rna_Object_active_constraint_set(PointerRNA *ptr, PointerRNA value) static bConstraint *rna_Object_constraints_new(Object *object, int type) { + bConstraint *new_con = BKE_constraint_add_for_object(object, NULL, type); + + ED_object_constraint_tag_update(object, new_con); WM_main_add_notifier(NC_OBJECT | ND_CONSTRAINT | NA_ADDED, object); - return BKE_constraint_add_for_object(object, NULL, type); + + return new_con; } static void rna_Object_constraints_remove(Object *object, ReportList *reports, PointerRNA *con_ptr) diff --git a/source/blender/makesrna/intern/rna_object_force.c b/source/blender/makesrna/intern/rna_object_force.c index 1d89f7535c4..514fca1b011 100644 --- a/source/blender/makesrna/intern/rna_object_force.c +++ b/source/blender/makesrna/intern/rna_object_force.c @@ -1275,7 +1275,7 @@ static void rna_def_field(BlenderRNA *brna) prop = RNA_def_property(srna, "falloff_power", PROP_FLOAT, PROP_NONE); RNA_def_property_float_sdna(prop, NULL, "f_power"); RNA_def_property_range(prop, 0.0f, 10.0f); - RNA_def_property_ui_text(prop, "Falloff Power", "Falloff power (real gravitational falloff = 2)"); + RNA_def_property_ui_text(prop, "Falloff Power", ""); RNA_def_property_update(prop, 0, "rna_FieldSettings_update"); prop = RNA_def_property(srna, "distance_min", PROP_FLOAT, PROP_NONE); @@ -1394,6 +1394,11 @@ static void rna_def_field(BlenderRNA *brna) RNA_def_property_boolean_sdna(prop, NULL, "flag", PFIELD_SMOKE_DENSITY); RNA_def_property_ui_text(prop, "Apply Density", "Adjust force strength based on smoke density"); RNA_def_property_update(prop, 0, "rna_FieldSettings_update"); + prop = RNA_def_property(srna, "use_gravity_falloff", PROP_BOOLEAN, PROP_NONE); + RNA_def_property_boolean_sdna(prop, NULL, "flag", PFIELD_GRAVITATION); + RNA_def_property_ui_text(prop, "Gravity Falloff", "Multiply force by 1/distance²"); + RNA_def_property_update(prop, 0, "rna_FieldSettings_update"); + /* Pointer */ diff --git a/source/blender/makesrna/intern/rna_pose.c b/source/blender/makesrna/intern/rna_pose.c index 28ce63a61bd..8d161466d56 100644 --- a/source/blender/makesrna/intern/rna_pose.c +++ b/source/blender/makesrna/intern/rna_pose.c @@ -524,12 +524,15 @@ static void rna_PoseChannel_active_constraint_set(PointerRNA *ptr, PointerRNA va BKE_constraints_active_set(&pchan->constraints, (bConstraint *)value.data); } -static bConstraint *rna_PoseChannel_constraints_new(bPoseChannel *pchan, int type) +static bConstraint *rna_PoseChannel_constraints_new(ID *id, bPoseChannel *pchan, Main *main, int type) { - /*WM_main_add_notifier(NC_OBJECT|ND_CONSTRAINT|NA_ADDED, object); */ - /* TODO, pass object also */ - /* TODO, new pose bones don't have updated draw flags */ - return BKE_constraint_add_for_pose(NULL, pchan, NULL, type); + Object *ob = (Object *)id; + bConstraint *new_con = BKE_constraint_add_for_pose(ob, pchan, NULL, type); + + ED_object_constraint_dependency_tag_update(main, ob, new_con); + WM_main_add_notifier(NC_OBJECT | ND_CONSTRAINT | NA_ADDED, id); + + return new_con; } static void rna_PoseChannel_constraints_remove(ID *id, bPoseChannel *pchan, ReportList *reports, PointerRNA *con_ptr) @@ -764,6 +767,7 @@ static void rna_def_pose_channel_constraints(BlenderRNA *brna, PropertyRNA *cpro /* Constraint collection */ func = RNA_def_function(srna, "new", "rna_PoseChannel_constraints_new"); RNA_def_function_ui_description(func, "Add a constraint to this object"); + RNA_def_function_flag(func, FUNC_USE_MAIN | FUNC_USE_SELF_ID); /* ID and Main needed for refresh */ /* return type */ parm = RNA_def_pointer(func, "constraint", "Constraint", "", "New constraint"); RNA_def_function_return(func, parm); diff --git a/source/blender/makesrna/intern/rna_scene.c b/source/blender/makesrna/intern/rna_scene.c index 1166fb89a0a..121e4f56a6e 100644 --- a/source/blender/makesrna/intern/rna_scene.c +++ b/source/blender/makesrna/intern/rna_scene.c @@ -411,7 +411,7 @@ EnumPropertyItem rna_enum_gpencil_interpolation_mode_items[] = { /* interpolation */ {0, "", 0, N_("Interpolation"), "Standard transitions between keyframes"}, {GP_IPO_LINEAR, "LINEAR", ICON_IPO_LINEAR, "Linear", "Straight-line interpolation between A and B (i.e. no ease in/out)"}, - {GP_IPO_CURVEMAP, "CUSTOM", ICON_IPO_BEZIER, "Custom", "Custom interpolation defined using a curvemap"}, + {GP_IPO_CURVEMAP, "CUSTOM", ICON_IPO_BEZIER, "Custom", "Custom interpolation defined using a curve map"}, /* easing */ {0, "", 0, N_("Easing (by strength)"), "Predefined inertial transitions, useful for motion graphics (from least to most ''dramatic'')"}, @@ -792,6 +792,21 @@ static void rna_Scene_frame_current_set(PointerRNA *ptr, int value) data->r.cfra = value; } +static float rna_Scene_frame_float_get(PointerRNA *ptr) +{ + Scene *data = (Scene *)ptr->data; + return (float)data->r.cfra + data->r.subframe; +} + +static void rna_Scene_frame_float_set(PointerRNA *ptr, float value) +{ + Scene *data = (Scene *)ptr->data; + /* if negative frames aren't allowed, then we can't use them */ + FRAMENUMBER_MIN_CLAMP(value); + data->r.cfra = (int)value; + data->r.subframe = value - data->r.cfra; +} + static float rna_Scene_frame_current_final_get(PointerRNA *ptr) { Scene *scene = (Scene *)ptr->data; @@ -872,6 +887,12 @@ static void rna_Scene_preview_range_end_frame_set(PointerRNA *ptr, int value) data->r.pefra = value; } +static void rna_Scene_show_subframe_update(Main *UNUSED(bmain), Scene *UNUSED(current_scene), PointerRNA *ptr) +{ + Scene *scene = (Scene *)ptr->id.data; + scene->r.subframe = 0.0f; +} + static void rna_Scene_frame_update(Main *bmain, Scene *UNUSED(current_scene), PointerRNA *ptr) { Scene *scene = (Scene *)ptr->id.data; @@ -7081,8 +7102,19 @@ void RNA_def_scene(BlenderRNA *brna) prop = RNA_def_property(srna, "frame_subframe", PROP_FLOAT, PROP_TIME); RNA_def_property_float_sdna(prop, NULL, "r.subframe"); RNA_def_property_ui_text(prop, "Current Sub-Frame", ""); - RNA_def_property_clear_flag(prop, PROP_ANIMATABLE | PROP_EDITABLE); - + RNA_def_property_clear_flag(prop, PROP_ANIMATABLE); + RNA_def_property_range(prop, 0.0f, 1.0f); + RNA_def_property_ui_range(prop, 0.0f, 1.0f, 0.01, 2); + RNA_def_property_update(prop, NC_SCENE | ND_FRAME, "rna_Scene_frame_update"); + + prop = RNA_def_property(srna, "frame_float", PROP_FLOAT, PROP_TIME); + RNA_def_property_ui_text(prop, "Current Sub-Frame", ""); + RNA_def_property_clear_flag(prop, PROP_ANIMATABLE); + RNA_def_property_range(prop, MINAFRAME, MAXFRAME); + RNA_def_property_ui_range(prop, MINAFRAME, MAXFRAME, 0.1, 2); + RNA_def_property_float_funcs(prop, "rna_Scene_frame_float_get", "rna_Scene_frame_float_set", NULL); + RNA_def_property_update(prop, NC_SCENE | ND_FRAME, "rna_Scene_frame_update"); + prop = RNA_def_property(srna, "frame_start", PROP_INT, PROP_TIME); RNA_def_property_clear_flag(prop, PROP_ANIMATABLE); RNA_def_property_int_sdna(prop, NULL, "r.sfra"); @@ -7147,7 +7179,15 @@ void RNA_def_scene(BlenderRNA *brna) RNA_def_property_int_funcs(prop, NULL, "rna_Scene_preview_range_end_frame_set", NULL); RNA_def_property_ui_text(prop, "Preview Range End Frame", "Alternative end frame for UI playback"); RNA_def_property_update(prop, NC_SCENE | ND_FRAME, NULL); - + + /* Subframe for moblur debug. */ + prop = RNA_def_property(srna, "show_subframe", PROP_BOOLEAN, PROP_NONE); + RNA_def_property_clear_flag(prop, PROP_ANIMATABLE); + RNA_def_property_boolean_sdna(prop, NULL, "r.flag", SCER_SHOW_SUBFRAME); + RNA_def_property_ui_text(prop, "Show Subframe", + "Show current scene subframe and allow set it using interface tools"); + RNA_def_property_update(prop, NC_SCENE | ND_FRAME, "rna_Scene_show_subframe_update"); + /* Timeline / Time Navigation settings */ prop = RNA_def_property(srna, "show_keys_from_selected_only", PROP_BOOLEAN, PROP_NONE); RNA_def_property_boolean_negative_sdna(prop, NULL, "flag", SCE_KEYS_NO_SELONLY); diff --git a/source/blender/makesrna/intern/rna_smoke.c b/source/blender/makesrna/intern/rna_smoke.c index 6db370fc152..c12937bd2bf 100644 --- a/source/blender/makesrna/intern/rna_smoke.c +++ b/source/blender/makesrna/intern/rna_smoke.c @@ -832,14 +832,14 @@ static void rna_def_smoke_domain_settings(BlenderRNA *brna) {FLUID_FIELD_COLOR_R, "COLOR_R", 0, "Red", "Red component of the color field"}, {FLUID_FIELD_COLOR_G, "COLOR_G", 0, "Green", "Green component of the color field"}, {FLUID_FIELD_COLOR_B, "COLOR_B", 0, "Blue", "Blue component of the color field"}, - {FLUID_FIELD_DENSITY, "DENSITY", 0, "Density", "Quantity of soot in the fluid"}, + {FLUID_FIELD_DENSITY, "DENSITY", 0, "Density", "Quantity of soot in the fluid"}, {FLUID_FIELD_FLAME, "FLAME", 0, "Flame", "Flame field"}, {FLUID_FIELD_FUEL, "FUEL", 0, "Fuel", "Fuel field"}, {FLUID_FIELD_HEAT, "HEAT", 0, "Heat", "Temperature of the fluid"}, {FLUID_FIELD_VELOCITY_X, "VELOCITY_X", 0, "X Velocity", "X component of the velocity field"}, {FLUID_FIELD_VELOCITY_Y, "VELOCITY_Y", 0, "Y Velocity", "Y component of the velocity field"}, {FLUID_FIELD_VELOCITY_Z, "VELOCITY_Z", 0, "Z Velocity", "Z component of the velocity field"}, - {0, NULL, 0, NULL, NULL} + {0, NULL, 0, NULL, NULL} }; prop = RNA_def_property(srna, "coba_field", PROP_ENUM, PROP_NONE); diff --git a/source/blender/makesrna/intern/rna_userdef.c b/source/blender/makesrna/intern/rna_userdef.c index a75e3c13fce..6927abcb4f8 100644 --- a/source/blender/makesrna/intern/rna_userdef.c +++ b/source/blender/makesrna/intern/rna_userdef.c @@ -4192,6 +4192,10 @@ static void rna_def_userdef_system(BlenderRNA *brna) RNA_def_property_ui_text(prop, "Selection Method", "Use OpenGL occlusion queries or selection render mode to accelerate selection"); + prop = RNA_def_property(srna, "use_select_pick_depth", PROP_BOOLEAN, PROP_NONE); + RNA_def_property_boolean_sdna(prop, NULL, "gpu_select_pick_deph", 1); + RNA_def_property_ui_text(prop, "OpenGL Depth Picking", "Use the depth buffer for picking 3D View selection"); + /* Full scene anti-aliasing */ prop = RNA_def_property(srna, "multi_sample", PROP_ENUM, PROP_NONE); RNA_def_property_enum_bitflag_sdna(prop, NULL, "ogl_multisamples"); diff --git a/source/blender/modifiers/CMakeLists.txt b/source/blender/modifiers/CMakeLists.txt index bacfc177432..ad2b862141c 100644 --- a/source/blender/modifiers/CMakeLists.txt +++ b/source/blender/modifiers/CMakeLists.txt @@ -93,6 +93,7 @@ set(SRC intern/MOD_solidify.c intern/MOD_subsurf.c intern/MOD_surface.c + intern/MOD_surfacedeform.c intern/MOD_triangulate.c intern/MOD_util.c intern/MOD_uvwarp.c diff --git a/source/blender/modifiers/MOD_modifiertypes.h b/source/blender/modifiers/MOD_modifiertypes.h index 4c881445893..bf121af2bd1 100644 --- a/source/blender/modifiers/MOD_modifiertypes.h +++ b/source/blender/modifiers/MOD_modifiertypes.h @@ -85,6 +85,7 @@ extern ModifierTypeInfo modifierType_DataTransfer; extern ModifierTypeInfo modifierType_NormalEdit; extern ModifierTypeInfo modifierType_CorrectiveSmooth; extern ModifierTypeInfo modifierType_MeshSequenceCache; +extern ModifierTypeInfo modifierType_SurfaceDeform; /* MOD_util.c */ void modifier_type_init(ModifierTypeInfo *types[]); diff --git a/source/blender/modifiers/intern/MOD_boolean.c b/source/blender/modifiers/intern/MOD_boolean.c index f828bc68857..f86d8b99f3c 100644 --- a/source/blender/modifiers/intern/MOD_boolean.c +++ b/source/blender/modifiers/intern/MOD_boolean.c @@ -319,6 +319,7 @@ static DerivedMesh *applyModifier_bmesh( use_separate, use_dissolve, use_island_connect, + false, bmd->operation, bmd->double_threshold); diff --git a/source/blender/modifiers/intern/MOD_displace.c b/source/blender/modifiers/intern/MOD_displace.c index f6acbef96e9..18f60bab490 100644 --- a/source/blender/modifiers/intern/MOD_displace.c +++ b/source/blender/modifiers/intern/MOD_displace.c @@ -325,7 +325,7 @@ static void displaceModifier_do( float (*tex_co)[3]; float weight = 1.0f; /* init value unused but some compilers may complain */ float (*vert_clnors)[3] = NULL; - float local_mat[4][4] = {0}; + float local_mat[4][4] = {{0}}; const bool use_global_direction = dmd->space == MOD_DISP_SPACE_GLOBAL; if (!dmd->texture && dmd->direction == MOD_DISP_DIR_RGB_XYZ) return; diff --git a/source/blender/modifiers/intern/MOD_dynamicpaint.c b/source/blender/modifiers/intern/MOD_dynamicpaint.c index 05068b9b597..bb75d655802 100644 --- a/source/blender/modifiers/intern/MOD_dynamicpaint.c +++ b/source/blender/modifiers/intern/MOD_dynamicpaint.c @@ -116,7 +116,7 @@ static DerivedMesh *applyModifier(ModifierData *md, Object *ob, static bool is_brush_cb(Object *UNUSED(ob), ModifierData *pmd) { - return ((DynamicPaintModifierData*)pmd)->brush != NULL; + return ((DynamicPaintModifierData *)pmd)->brush != NULL; } static void updateDepgraph(ModifierData *md, DagForest *forest, diff --git a/source/blender/modifiers/intern/MOD_surfacedeform.c b/source/blender/modifiers/intern/MOD_surfacedeform.c new file mode 100644 index 00000000000..a999d7629af --- /dev/null +++ b/source/blender/modifiers/intern/MOD_surfacedeform.c @@ -0,0 +1,1226 @@ +#include "DNA_object_types.h" +#include "DNA_scene_types.h" + +#include "BLI_alloca.h" +#include "BLI_math.h" +#include "BLI_math_geom.h" +#include "BLI_task.h" + +#include "BKE_cdderivedmesh.h" +#include "BKE_editmesh.h" +#include "BKE_library_query.h" +#include "BKE_modifier.h" + +#include "depsgraph_private.h" + +#include "MEM_guardedalloc.h" + +#include "MOD_util.h" + +typedef struct SDefAdjacency { + struct SDefAdjacency *next; + unsigned int index; +} SDefAdjacency; + +typedef struct SDefAdjacencyArray { + SDefAdjacency *first; + unsigned int num; /* Careful, this is twice the number of polygons (avoids an extra loop) */ +} SDefAdjacencyArray; + +typedef struct SDefEdgePolys { + unsigned int polys[2], num; +} SDefEdgePolys; + +typedef struct SDefBindCalcData { + BVHTreeFromMesh * const treeData; + const SDefAdjacencyArray * const vert_edges; + const SDefEdgePolys * const edge_polys; + SDefVert * const bind_verts; + const MLoopTri * const looptri; + const MPoly * const mpoly; + const MEdge * const medge; + const MLoop * const mloop; + float (* const targetCos)[3]; + float (* const vertexCos)[3]; + float imat[4][4]; + const float falloff; + int success; +} SDefBindCalcData; + +typedef struct SDefBindPoly { + float (*coords)[3]; + float (*coords_v2)[2]; + float point_v2[2]; + float weight_angular; + float weight_dist_proj; + float weight_dist; + float weight; + float scales[2]; + float centroid[3]; + float centroid_v2[2]; + float normal[3]; + float cent_edgemid_vecs_v2[2][2]; + float edgemid_angle; + float point_edgemid_angles[2]; + float corner_edgemid_angles[2]; + float dominant_angle_weight; + unsigned int index; + unsigned int numverts; + unsigned int loopstart; + unsigned int edge_inds[2]; + unsigned int edge_vert_inds[2]; + unsigned int corner_ind; + unsigned int dominant_edge; + bool inside; +} SDefBindPoly; + +typedef struct SDefBindWeightData { + SDefBindPoly *bind_polys; + unsigned int numpoly; + unsigned int numbinds; +} SDefBindWeightData; + +typedef struct SDefDeformData { + const SDefVert * const bind_verts; + float (* const targetCos)[3]; + float (* const vertexCos)[3]; +} SDefDeformData; + +/* Bind result values */ +enum { + MOD_SDEF_BIND_RESULT_SUCCESS = 1, + MOD_SDEF_BIND_RESULT_GENERIC_ERR = 0, + MOD_SDEF_BIND_RESULT_MEM_ERR = -1, + MOD_SDEF_BIND_RESULT_NONMANY_ERR = -2, + MOD_SDEF_BIND_RESULT_CONCAVE_ERR = -3, + MOD_SDEF_BIND_RESULT_OVERLAP_ERR = -4, +}; + +/* Infinite weight flags */ +enum { + MOD_SDEF_INFINITE_WEIGHT_ANGULAR = (1 << 0), + MOD_SDEF_INFINITE_WEIGHT_DIST_PROJ = (1 << 1), + MOD_SDEF_INFINITE_WEIGHT_DIST = (1 << 2), +}; + +static void initData(ModifierData *md) +{ + SurfaceDeformModifierData *smd = (SurfaceDeformModifierData *)md; + smd->target = NULL; + smd->verts = NULL; + smd->flags = 0; + smd->falloff = 4.0f; +} + +static void freeData(ModifierData *md) +{ + SurfaceDeformModifierData *smd = (SurfaceDeformModifierData *)md; + + if (smd->verts) { + for (int i = 0; i < smd->numverts; i++) { + if (smd->verts[i].binds) { + for (int j = 0; j < smd->verts[i].numbinds; j++) { + MEM_SAFE_FREE(smd->verts[i].binds[j].vert_inds); + MEM_SAFE_FREE(smd->verts[i].binds[j].vert_weights); + } + + MEM_freeN(smd->verts[i].binds); + } + } + + MEM_freeN(smd->verts); + smd->verts = NULL; + } +} + +static void copyData(ModifierData *md, ModifierData *target) +{ + SurfaceDeformModifierData *smd = (SurfaceDeformModifierData *)md; + SurfaceDeformModifierData *tsmd = (SurfaceDeformModifierData *)target; + + *tsmd = *smd; + + if (smd->verts) { + tsmd->verts = MEM_dupallocN(smd->verts); + + for (int i = 0; i < smd->numverts; i++) { + if (smd->verts[i].binds) { + tsmd->verts[i].binds = MEM_dupallocN(smd->verts[i].binds); + + for (int j = 0; j < smd->verts[i].numbinds; j++) { + if (smd->verts[i].binds[j].vert_inds) { + tsmd->verts[i].binds[j].vert_inds = MEM_dupallocN(smd->verts[i].binds[j].vert_inds); + } + + if (smd->verts[i].binds[j].vert_weights) { + tsmd->verts[i].binds[j].vert_weights = MEM_dupallocN(smd->verts[i].binds[j].vert_weights); + } + } + } + } + } +} + +static void foreachObjectLink(ModifierData *md, Object *ob, ObjectWalkFunc walk, void *userData) +{ + SurfaceDeformModifierData *smd = (SurfaceDeformModifierData *)md; + + walk(userData, ob, &smd->target, IDWALK_NOP); +} + +static void updateDepgraph(ModifierData *md, DagForest *forest, + struct Main *UNUSED(bmain), + struct Scene *UNUSED(scene), + Object *UNUSED(ob), + DagNode *obNode) +{ + SurfaceDeformModifierData *smd = (SurfaceDeformModifierData *)md; + + if (smd->target) { + DagNode *curNode = dag_get_node(forest, smd->target); + + dag_add_relation(forest, curNode, obNode, DAG_RL_DATA_DATA, "Surface Deform Modifier"); + } +} + +static void updateDepsgraph(ModifierData *md, + struct Main *UNUSED(bmain), + struct Scene *UNUSED(scene), + Object *UNUSED(ob), + struct DepsNodeHandle *node) +{ + SurfaceDeformModifierData *smd = (SurfaceDeformModifierData *)md; + if (smd->target != NULL) { + DEG_add_object_relation(node, smd->target, DEG_OB_COMP_GEOMETRY, "Surface Deform Modifier"); + } +} + +static void freeAdjacencyMap(SDefAdjacencyArray * const vert_edges, SDefAdjacency * const adj_ref, SDefEdgePolys * const edge_polys) +{ + MEM_freeN(edge_polys); + + MEM_freeN(adj_ref); + + MEM_freeN(vert_edges); +} + +static int buildAdjacencyMap(const MPoly *poly, const MEdge *edge, const MLoop * const mloop, const unsigned int numpoly, const unsigned int numedges, + SDefAdjacencyArray * const vert_edges, SDefAdjacency *adj, SDefEdgePolys * const edge_polys) +{ + const MLoop *loop; + + /* Fing polygons adjacent to edges */ + for (int i = 0; i < numpoly; i++, poly++) { + loop = &mloop[poly->loopstart]; + + for (int j = 0; j < poly->totloop; j++, loop++) { + if (edge_polys[loop->e].num == 0) { + edge_polys[loop->e].polys[0] = i; + edge_polys[loop->e].polys[1] = -1; + edge_polys[loop->e].num++; + } + else if (edge_polys[loop->e].num == 1) { + edge_polys[loop->e].polys[1] = i; + edge_polys[loop->e].num++; + } + else { + return MOD_SDEF_BIND_RESULT_NONMANY_ERR; + } + } + } + + /* Find edges adjacent to vertices */ + for (int i = 0; i < numedges; i++, edge++) { + adj->next = vert_edges[edge->v1].first; + adj->index = i; + vert_edges[edge->v1].first = adj; + vert_edges[edge->v1].num += edge_polys[i].num; + adj++; + + adj->next = vert_edges[edge->v2].first; + adj->index = i; + vert_edges[edge->v2].first = adj; + vert_edges[edge->v2].num += edge_polys[i].num; + adj++; + } + + return MOD_SDEF_BIND_RESULT_SUCCESS; +} + +BLI_INLINE void sortPolyVertsEdge(unsigned int *indices, const MLoop * const mloop, const unsigned int edge, const unsigned int num) +{ + bool found = false; + + for (int i = 0; i < num; i++) { + if (mloop[i].e == edge) { + found = true; + } + if (found) { + *indices = mloop[i].v; + indices++; + } + } + + /* Fill in remaining vertex indices that occur before the edge */ + for (int i = 0; mloop[i].e != edge; i++) { + *indices = mloop[i].v; + indices++; + } +} + +BLI_INLINE void sortPolyVertsTri(unsigned int *indices, const MLoop * const mloop, const unsigned int loopstart, const unsigned int num) +{ + for (int i = loopstart; i < num; i++) { + *indices = mloop[i].v; + indices++; + } + + for (int i = 0; i < loopstart; i++) { + *indices = mloop[i].v; + indices++; + } +} + +BLI_INLINE unsigned int nearestVert(SDefBindCalcData * const data, const float point_co[3]) +{ + BVHTreeNearest nearest = {.dist_sq = FLT_MAX, .index = -1}; + const MPoly *poly; + const MEdge *edge; + const MLoop *loop; + float t_point[3]; + float max_dist = FLT_MAX; + float dist; + unsigned int index = 0; + + mul_v3_m4v3(t_point, data->imat, point_co); + + BLI_bvhtree_find_nearest(data->treeData->tree, t_point, &nearest, data->treeData->nearest_callback, data->treeData); + + poly = &data->mpoly[data->looptri[nearest.index].poly]; + loop = &data->mloop[poly->loopstart]; + + for (int i = 0; i < poly->totloop; i++, loop++) { + edge = &data->medge[loop->e]; + dist = dist_squared_to_line_segment_v3(point_co, data->targetCos[edge->v1], data->targetCos[edge->v2]); + + if (dist < max_dist) { + max_dist = dist; + index = loop->e; + } + } + + edge = &data->medge[index]; + if (len_squared_v3v3(point_co, data->targetCos[edge->v1]) < len_squared_v3v3(point_co, data->targetCos[edge->v2])) { + return edge->v1; + } + else { + return edge->v2; + } +} + +BLI_INLINE int isPolyValid(const float coords[][2], const unsigned int nr) +{ + float prev_co[2]; + float curr_vec[2], prev_vec[2]; + + if (!is_poly_convex_v2(coords, nr)) { + return MOD_SDEF_BIND_RESULT_CONCAVE_ERR; + } + + copy_v2_v2(prev_co, coords[nr - 1]); + sub_v2_v2v2(prev_vec, prev_co, coords[nr - 2]); + + for (int i = 0; i < nr; i++) { + sub_v2_v2v2(curr_vec, coords[i], prev_co); + + if (len_squared_v2(curr_vec) < FLT_EPSILON) { + return MOD_SDEF_BIND_RESULT_OVERLAP_ERR; + } + + if (1.0f - dot_v2v2(prev_vec, curr_vec) < FLT_EPSILON) { + return MOD_SDEF_BIND_RESULT_CONCAVE_ERR; + } + + copy_v2_v2(prev_co, coords[i]); + copy_v2_v2(prev_vec, curr_vec); + } + + return MOD_SDEF_BIND_RESULT_SUCCESS; +} + +static void freeBindData(SDefBindWeightData * const bwdata) +{ + SDefBindPoly *bpoly = bwdata->bind_polys; + + if (bwdata->bind_polys) { + for (int i = 0; i < bwdata->numpoly; bpoly++, i++) { + MEM_SAFE_FREE(bpoly->coords); + MEM_SAFE_FREE(bpoly->coords_v2); + } + + MEM_freeN(bwdata->bind_polys); + } + + MEM_freeN(bwdata); +} + +BLI_INLINE float computeAngularWeight(const float point_angle, const float edgemid_angle) +{ + float weight; + + weight = point_angle; + weight /= edgemid_angle; + weight *= M_PI_2; + + return sinf(weight); +} + +BLI_INLINE SDefBindWeightData *computeBindWeights(SDefBindCalcData * const data, const float point_co[3]) +{ + const unsigned int nearest = nearestVert(data, point_co); + const SDefAdjacency * const vert_edges = data->vert_edges[nearest].first; + const SDefEdgePolys * const edge_polys = data->edge_polys; + + const SDefAdjacency *vedge; + const MPoly *poly; + const MLoop *loop; + + SDefBindWeightData *bwdata; + SDefBindPoly *bpoly; + + float world[3] = {0.0f, 0.0f, 1.0f}; + float avg_point_dist = 0.0f; + float tot_weight = 0.0f; + int inf_weight_flags = 0; + + bwdata = MEM_callocN(sizeof(*bwdata), "SDefBindWeightData"); + if (bwdata == NULL) { + data->success = MOD_SDEF_BIND_RESULT_MEM_ERR; + return NULL; + } + + bwdata->numpoly = data->vert_edges[nearest].num / 2; + + bpoly = MEM_callocN(sizeof(*bpoly) * bwdata->numpoly, "SDefBindPoly"); + if (bpoly == NULL) { + freeBindData(bwdata); + data->success = MOD_SDEF_BIND_RESULT_MEM_ERR; + return NULL; + } + + bwdata->bind_polys = bpoly; + + /* Loop over all adjacent edges, and build the SDefBindPoly data for each poly adjacent to those */ + for (vedge = vert_edges; vedge; vedge = vedge->next) { + unsigned int edge_ind = vedge->index; + + for (int i = 0; i < edge_polys[edge_ind].num; i++) { + { + bpoly = bwdata->bind_polys; + + for (int j = 0; j < bwdata->numpoly; bpoly++, j++) { + /* If coords isn't allocated, we have reached the first uninitialized bpoly */ + if ((bpoly->index == edge_polys[edge_ind].polys[i]) || (!bpoly->coords)) { + break; + } + } + } + + /* Check if poly was already created by another edge or still has to be initialized */ + if (!bpoly->coords) { + float angle; + float axis[3]; + float tmp_vec_v2[2]; + int is_poly_valid; + + bpoly->index = edge_polys[edge_ind].polys[i]; + bpoly->coords = NULL; + bpoly->coords_v2 = NULL; + + /* Copy poly data */ + poly = &data->mpoly[bpoly->index]; + loop = &data->mloop[poly->loopstart]; + + bpoly->numverts = poly->totloop; + bpoly->loopstart = poly->loopstart; + + bpoly->coords = MEM_mallocN(sizeof(*bpoly->coords) * poly->totloop, "SDefBindPolyCoords"); + if (bpoly->coords == NULL) { + freeBindData(bwdata); + data->success = MOD_SDEF_BIND_RESULT_MEM_ERR; + return NULL; + } + + bpoly->coords_v2 = MEM_mallocN(sizeof(*bpoly->coords_v2) * poly->totloop, "SDefBindPolyCoords_v2"); + if (bpoly->coords_v2 == NULL) { + freeBindData(bwdata); + data->success = MOD_SDEF_BIND_RESULT_MEM_ERR; + return NULL; + } + + for (int j = 0; j < poly->totloop; j++, loop++) { + copy_v3_v3(bpoly->coords[j], data->targetCos[loop->v]); + + /* Find corner and edge indices within poly loop array */ + if (loop->v == nearest) { + bpoly->corner_ind = j; + bpoly->edge_vert_inds[0] = (j == 0) ? (poly->totloop - 1) : (j - 1); + bpoly->edge_vert_inds[1] = (j == poly->totloop - 1) ? (0) : (j + 1); + + bpoly->edge_inds[0] = data->mloop[poly->loopstart + bpoly->edge_vert_inds[0]].e; + bpoly->edge_inds[1] = loop->e; + } + } + + /* Compute poly's parametric data */ + mid_v3_v3_array(bpoly->centroid, bpoly->coords, poly->totloop); + normal_poly_v3(bpoly->normal, bpoly->coords, poly->totloop); + + /* Compute poly skew angle and axis */ + angle = angle_normalized_v3v3(bpoly->normal, world); + + cross_v3_v3v3(axis, bpoly->normal, world); + normalize_v3(axis); + + /* Map coords onto 2d normal plane */ + map_to_plane_axis_angle_v2_v3v3fl(bpoly->point_v2, point_co, axis, angle); + + zero_v2(bpoly->centroid_v2); + for (int j = 0; j < poly->totloop; j++) { + map_to_plane_axis_angle_v2_v3v3fl(bpoly->coords_v2[j], bpoly->coords[j], axis, angle); + madd_v2_v2fl(bpoly->centroid_v2, bpoly->coords_v2[j], 1.0f / poly->totloop); + } + + is_poly_valid = isPolyValid(bpoly->coords_v2, poly->totloop); + + if (is_poly_valid != MOD_SDEF_BIND_RESULT_SUCCESS) { + freeBindData(bwdata); + data->success = is_poly_valid; + return NULL; + } + + bpoly->inside = isect_point_poly_v2(bpoly->point_v2, bpoly->coords_v2, poly->totloop, false); + + /* Initialize weight components */ + bpoly->weight_angular = 1.0f; + bpoly->weight_dist_proj = len_v2v2(bpoly->centroid_v2, bpoly->point_v2); + bpoly->weight_dist = len_v3v3(bpoly->centroid, point_co); + + avg_point_dist += bpoly->weight_dist; + + /* Compute centroid to mid-edge vectors */ + mid_v2_v2v2(bpoly->cent_edgemid_vecs_v2[0], + bpoly->coords_v2[bpoly->edge_vert_inds[0]], + bpoly->coords_v2[bpoly->corner_ind]); + + mid_v2_v2v2(bpoly->cent_edgemid_vecs_v2[1], + bpoly->coords_v2[bpoly->edge_vert_inds[1]], + bpoly->coords_v2[bpoly->corner_ind]); + + sub_v2_v2(bpoly->cent_edgemid_vecs_v2[0], bpoly->centroid_v2); + sub_v2_v2(bpoly->cent_edgemid_vecs_v2[1], bpoly->centroid_v2); + + /* Compute poly scales with respect to mid-edges, and normalize the vectors */ + bpoly->scales[0] = normalize_v2(bpoly->cent_edgemid_vecs_v2[0]); + bpoly->scales[1] = normalize_v2(bpoly->cent_edgemid_vecs_v2[1]); + + /* Compute the required polygon angles */ + bpoly->edgemid_angle = angle_normalized_v2v2(bpoly->cent_edgemid_vecs_v2[0], bpoly->cent_edgemid_vecs_v2[1]); + + sub_v2_v2v2(tmp_vec_v2, bpoly->coords_v2[bpoly->corner_ind], bpoly->centroid_v2); + normalize_v2(tmp_vec_v2); + + bpoly->corner_edgemid_angles[0] = angle_normalized_v2v2(tmp_vec_v2, bpoly->cent_edgemid_vecs_v2[0]); + bpoly->corner_edgemid_angles[1] = angle_normalized_v2v2(tmp_vec_v2, bpoly->cent_edgemid_vecs_v2[1]); + + /* Check for inifnite weights, and compute angular data otherwise */ + if (bpoly->weight_dist < FLT_EPSILON) { + inf_weight_flags |= MOD_SDEF_INFINITE_WEIGHT_DIST_PROJ; + inf_weight_flags |= MOD_SDEF_INFINITE_WEIGHT_DIST; + } + else if (bpoly->weight_dist_proj < FLT_EPSILON) { + inf_weight_flags |= MOD_SDEF_INFINITE_WEIGHT_DIST_PROJ; + } + else { + float cent_point_vec[2]; + + sub_v2_v2v2(cent_point_vec, bpoly->point_v2, bpoly->centroid_v2); + normalize_v2(cent_point_vec); + + bpoly->point_edgemid_angles[0] = angle_normalized_v2v2(cent_point_vec, bpoly->cent_edgemid_vecs_v2[0]); + bpoly->point_edgemid_angles[1] = angle_normalized_v2v2(cent_point_vec, bpoly->cent_edgemid_vecs_v2[1]); + } + } + } + } + + avg_point_dist /= bwdata->numpoly; + + /* If weights 1 and 2 are not infinite, loop over all adjacent edges again, + * and build adjacency dependent angle data (depends on all polygons having been computed) */ + if (!inf_weight_flags) { + for (vedge = vert_edges; vedge; vedge = vedge->next) { + SDefBindPoly *bpolys[2]; + const SDefEdgePolys *epolys; + float ang_weights[2]; + unsigned int edge_ind = vedge->index; + unsigned int edge_on_poly[2]; + + epolys = &edge_polys[edge_ind]; + + /* Find bind polys corresponding to the edge's adjacent polys */ + bpoly = bwdata->bind_polys; + + for (int i = 0, j = 0; (i < bwdata->numpoly) && (j < epolys->num); bpoly++, i++) { + if (ELEM(bpoly->index, epolys->polys[0], epolys->polys[1])) { + bpolys[j] = bpoly; + + if (bpoly->edge_inds[0] == edge_ind) { + edge_on_poly[j] = 0; + } + else { + edge_on_poly[j] = 1; + } + + j++; + } + } + + /* Compute angular weight component */ + if (epolys->num == 1) { + ang_weights[0] = computeAngularWeight(bpolys[0]->point_edgemid_angles[edge_on_poly[0]], bpolys[0]->edgemid_angle); + bpolys[0]->weight_angular *= ang_weights[0] * ang_weights[0]; + } + else if (epolys->num == 2) { + ang_weights[0] = computeAngularWeight(bpolys[0]->point_edgemid_angles[edge_on_poly[0]], bpolys[0]->edgemid_angle); + ang_weights[1] = computeAngularWeight(bpolys[1]->point_edgemid_angles[edge_on_poly[1]], bpolys[1]->edgemid_angle); + + bpolys[0]->weight_angular *= ang_weights[0] * ang_weights[1]; + bpolys[1]->weight_angular *= ang_weights[0] * ang_weights[1]; + } + } + } + + /* Compute scalings and falloff. + * Scale all weights if no infinite weight is found, + * scale only unprojected weight if projected weight is infinite, + * scale none if both are infinite. */ + if (!inf_weight_flags) { + bpoly = bwdata->bind_polys; + + for (int i = 0; i < bwdata->numpoly; bpoly++, i++) { + float corner_angle_weights[2]; + float scale_weight, sqr, inv_sqr; + + corner_angle_weights[0] = bpoly->point_edgemid_angles[0] / bpoly->corner_edgemid_angles[0]; + corner_angle_weights[1] = bpoly->point_edgemid_angles[1] / bpoly->corner_edgemid_angles[1]; + + if (isnan(corner_angle_weights[0]) || isnan(corner_angle_weights[1])) { + freeBindData(bwdata); + data->success = MOD_SDEF_BIND_RESULT_GENERIC_ERR; + return NULL; + } + + /* Find which edge the point is closer to */ + if (corner_angle_weights[0] < corner_angle_weights[1]) { + bpoly->dominant_edge = 0; + bpoly->dominant_angle_weight = corner_angle_weights[0]; + } + else { + bpoly->dominant_edge = 1; + bpoly->dominant_angle_weight = corner_angle_weights[1]; + } + + bpoly->dominant_angle_weight = sinf(bpoly->dominant_angle_weight * M_PI_2); + + /* Compute quadratic angular scale interpolation weight */ + scale_weight = bpoly->point_edgemid_angles[bpoly->dominant_edge] / bpoly->edgemid_angle; + scale_weight /= scale_weight + (bpoly->point_edgemid_angles[!bpoly->dominant_edge] / bpoly->edgemid_angle); + + sqr = scale_weight * scale_weight; + inv_sqr = 1.0f - scale_weight; + inv_sqr *= inv_sqr; + scale_weight = sqr / (sqr + inv_sqr); + + /* Compute interpolated scale (no longer need the individual scales, + * so simply storing the result over the scale in index zero) */ + bpoly->scales[0] = bpoly->scales[bpoly->dominant_edge] * (1.0f - scale_weight) + + bpoly->scales[!bpoly->dominant_edge] * scale_weight; + + /* Scale the point distance weights, and introduce falloff */ + bpoly->weight_dist_proj /= bpoly->scales[0]; + bpoly->weight_dist_proj = powf(bpoly->weight_dist_proj, data->falloff); + + bpoly->weight_dist /= avg_point_dist; + bpoly->weight_dist = powf(bpoly->weight_dist, data->falloff); + + /* Re-check for infinite weights, now that all scalings and interpolations are computed */ + if (bpoly->weight_dist < FLT_EPSILON) { + inf_weight_flags |= MOD_SDEF_INFINITE_WEIGHT_DIST_PROJ; + inf_weight_flags |= MOD_SDEF_INFINITE_WEIGHT_DIST; + } + else if (bpoly->weight_dist_proj < FLT_EPSILON) { + inf_weight_flags |= MOD_SDEF_INFINITE_WEIGHT_DIST_PROJ; + } + else if (bpoly->weight_angular < FLT_EPSILON) { + inf_weight_flags |= MOD_SDEF_INFINITE_WEIGHT_ANGULAR; + } + } + } + else if (!(inf_weight_flags & MOD_SDEF_INFINITE_WEIGHT_DIST)) { + bpoly = bwdata->bind_polys; + + for (int i = 0; i < bwdata->numpoly; bpoly++, i++) { + /* Scale the point distance weight by average point distance, and introduce falloff */ + bpoly->weight_dist /= avg_point_dist; + bpoly->weight_dist = powf(bpoly->weight_dist, data->falloff); + + /* Re-check for infinite weights, now that all scalings and interpolations are computed */ + if (bpoly->weight_dist < FLT_EPSILON) { + inf_weight_flags |= MOD_SDEF_INFINITE_WEIGHT_DIST; + } + } + } + + /* Final loop, to compute actual weights */ + bpoly = bwdata->bind_polys; + + for (int i = 0; i < bwdata->numpoly; bpoly++, i++) { + /* Weight computation from components */ + if (inf_weight_flags & MOD_SDEF_INFINITE_WEIGHT_DIST) { + bpoly->weight = bpoly->weight_dist < FLT_EPSILON ? 1.0f : 0.0f; + } + else if (inf_weight_flags & MOD_SDEF_INFINITE_WEIGHT_DIST_PROJ) { + bpoly->weight = bpoly->weight_dist_proj < FLT_EPSILON ? + 1.0f / bpoly->weight_dist : 0.0f; + } + else if (inf_weight_flags & MOD_SDEF_INFINITE_WEIGHT_ANGULAR) { + bpoly->weight = bpoly->weight_angular < FLT_EPSILON ? + 1.0f / bpoly->weight_dist_proj / bpoly->weight_dist : 0.0f; + } + else { + bpoly->weight = 1.0f / bpoly->weight_angular / + bpoly->weight_dist_proj / + bpoly->weight_dist; + } + + tot_weight += bpoly->weight; + } + + bpoly = bwdata->bind_polys; + + for (int i = 0; i < bwdata->numpoly; bpoly++, i++) { + bpoly->weight /= tot_weight; + + /* Evaluate if this poly is relevant to bind */ + /* Even though the weights should add up to 1.0, + * the losses of weights smaller than epsilon here + * should be negligible... */ + if (bpoly->weight >= FLT_EPSILON) { + if (bpoly->inside) { + bwdata->numbinds += 1; + } + else { + if (bpoly->dominant_angle_weight < FLT_EPSILON || 1.0f - bpoly->dominant_angle_weight < FLT_EPSILON) { + bwdata->numbinds += 1; + } + else { + bwdata->numbinds += 2; + } + } + } + } + + return bwdata; +} + +BLI_INLINE float computeNormalDisplacement(const float point_co[3], const float point_co_proj[3], const float normal[3]) +{ + float disp_vec[3]; + float normal_dist; + + sub_v3_v3v3(disp_vec, point_co, point_co_proj); + normal_dist = len_v3(disp_vec); + + if (dot_v3v3(disp_vec, normal) < 0) { + normal_dist *= -1; + } + + return normal_dist; +} + +static void bindVert(void *userdata, void *UNUSED(userdata_chunk), const int index, const int UNUSED(threadid)) +{ + SDefBindCalcData * const data = (SDefBindCalcData *)userdata; + float point_co[3]; + float point_co_proj[3]; + + SDefBindWeightData *bwdata; + SDefVert *sdvert = data->bind_verts + index; + SDefBindPoly *bpoly; + SDefBind *sdbind; + + if (data->success != MOD_SDEF_BIND_RESULT_SUCCESS) { + sdvert->binds = NULL; + sdvert->numbinds = 0; + return; + } + + copy_v3_v3(point_co, data->vertexCos[index]); + bwdata = computeBindWeights(data, point_co); + + if (bwdata == NULL) { + sdvert->binds = NULL; + sdvert->numbinds = 0; + return; + } + + sdvert->binds = MEM_callocN(sizeof(*sdvert->binds) * bwdata->numbinds, "SDefVertBindData"); + if (sdvert->binds == NULL) { + data->success = MOD_SDEF_BIND_RESULT_MEM_ERR; + sdvert->numbinds = 0; + return; + } + + sdvert->numbinds = bwdata->numbinds; + + sdbind = sdvert->binds; + + bpoly = bwdata->bind_polys; + + for (int i = 0; i < bwdata->numbinds; bpoly++) { + if (bpoly->weight >= FLT_EPSILON) { + if (bpoly->inside) { + const MLoop *loop = &data->mloop[bpoly->loopstart]; + + sdbind->influence = bpoly->weight; + sdbind->numverts = bpoly->numverts; + + sdbind->mode = MOD_SDEF_MODE_NGON; + sdbind->vert_weights = MEM_mallocN(sizeof(*sdbind->vert_weights) * bpoly->numverts, "SDefNgonVertWeights"); + if (sdbind->vert_weights == NULL) { + data->success = MOD_SDEF_BIND_RESULT_MEM_ERR; + return; + } + + sdbind->vert_inds = MEM_mallocN(sizeof(*sdbind->vert_inds) * bpoly->numverts, "SDefNgonVertInds"); + if (sdbind->vert_inds == NULL) { + data->success = MOD_SDEF_BIND_RESULT_MEM_ERR; + return; + } + + interp_weights_poly_v2(sdbind->vert_weights, bpoly->coords_v2, bpoly->numverts, bpoly->point_v2); + + /* Reproject vert based on weights and original poly verts, to reintroduce poly non-planarity */ + zero_v3(point_co_proj); + for (int j = 0; j < bpoly->numverts; j++, loop++) { + madd_v3_v3fl(point_co_proj, bpoly->coords[j], sdbind->vert_weights[j]); + sdbind->vert_inds[j] = loop->v; + } + + sdbind->normal_dist = computeNormalDisplacement(point_co, point_co_proj, bpoly->normal); + + sdbind++; + i++; + } + else { + float tmp_vec[3]; + float cent[3], norm[3]; + float v1[3], v2[3], v3[3]; + + if (1.0f - bpoly->dominant_angle_weight >= FLT_EPSILON) { + sdbind->influence = bpoly->weight * (1.0f - bpoly->dominant_angle_weight); + sdbind->numverts = bpoly->numverts; + + sdbind->mode = MOD_SDEF_MODE_CENTROID; + sdbind->vert_weights = MEM_mallocN(sizeof(*sdbind->vert_weights) * 3, "SDefCentVertWeights"); + if (sdbind->vert_weights == NULL) { + data->success = MOD_SDEF_BIND_RESULT_MEM_ERR; + return; + } + + sdbind->vert_inds = MEM_mallocN(sizeof(*sdbind->vert_inds) * bpoly->numverts, "SDefCentVertInds"); + if (sdbind->vert_inds == NULL) { + data->success = MOD_SDEF_BIND_RESULT_MEM_ERR; + return; + } + + sortPolyVertsEdge(sdbind->vert_inds, &data->mloop[bpoly->loopstart], + bpoly->edge_inds[bpoly->dominant_edge], bpoly->numverts); + + copy_v3_v3(v1, data->targetCos[sdbind->vert_inds[0]]); + copy_v3_v3(v2, data->targetCos[sdbind->vert_inds[1]]); + copy_v3_v3(v3, bpoly->centroid); + + mid_v3_v3v3v3(cent, v1, v2, v3); + normal_tri_v3(norm, v1, v2, v3); + + add_v3_v3v3(tmp_vec, point_co, bpoly->normal); + + /* We are sure the line is not parallel to the plane. + * Checking return value just to avoid warning... */ + if (!isect_line_plane_v3(point_co_proj, point_co, tmp_vec, cent, norm)) { + BLI_assert(false); + } + + interp_weights_tri_v3(sdbind->vert_weights, v1, v2, v3, point_co_proj); + + sdbind->normal_dist = computeNormalDisplacement(point_co, point_co_proj, bpoly->normal); + + sdbind++; + i++; + } + + if (bpoly->dominant_angle_weight >= FLT_EPSILON) { + sdbind->influence = bpoly->weight * bpoly->dominant_angle_weight; + sdbind->numverts = bpoly->numverts; + + sdbind->mode = MOD_SDEF_MODE_LOOPTRI; + sdbind->vert_weights = MEM_mallocN(sizeof(*sdbind->vert_weights) * 3, "SDefTriVertWeights"); + if (sdbind->vert_weights == NULL) { + data->success = MOD_SDEF_BIND_RESULT_MEM_ERR; + return; + } + + sdbind->vert_inds = MEM_mallocN(sizeof(*sdbind->vert_inds) * bpoly->numverts, "SDefTriVertInds"); + if (sdbind->vert_inds == NULL) { + data->success = MOD_SDEF_BIND_RESULT_MEM_ERR; + return; + } + + sortPolyVertsTri(sdbind->vert_inds, &data->mloop[bpoly->loopstart], bpoly->edge_vert_inds[0], bpoly->numverts); + + copy_v3_v3(v1, data->targetCos[sdbind->vert_inds[0]]); + copy_v3_v3(v2, data->targetCos[sdbind->vert_inds[1]]); + copy_v3_v3(v3, data->targetCos[sdbind->vert_inds[2]]); + + mid_v3_v3v3v3(cent, v1, v2, v3); + normal_tri_v3(norm, v1, v2, v3); + + add_v3_v3v3(tmp_vec, point_co, bpoly->normal); + + /* We are sure the line is not parallel to the plane. + * Checking return value just to avoid warning... */ + if (!isect_line_plane_v3(point_co_proj, point_co, tmp_vec, cent, norm)) { + BLI_assert(false); + } + + interp_weights_tri_v3(sdbind->vert_weights, v1, v2, v3, point_co_proj); + + sdbind->normal_dist = computeNormalDisplacement(point_co, point_co_proj, bpoly->normal); + + sdbind++; + i++; + } + } + } + } + + freeBindData(bwdata); +} + +static bool surfacedeformBind(SurfaceDeformModifierData *smd, float (*vertexCos)[3], + unsigned int numverts, unsigned int tnumpoly, unsigned int tnumverts, DerivedMesh *tdm) +{ + BVHTreeFromMesh treeData = {NULL}; + const MVert *mvert = tdm->getVertArray(tdm); + const MPoly *mpoly = tdm->getPolyArray(tdm); + const MEdge *medge = tdm->getEdgeArray(tdm); + const MLoop *mloop = tdm->getLoopArray(tdm); + unsigned int tnumedges = tdm->getNumEdges(tdm); + int adj_result; + SDefAdjacencyArray *vert_edges; + SDefAdjacency *adj_array; + SDefEdgePolys *edge_polys; + + vert_edges = MEM_callocN(sizeof(*vert_edges) * tnumverts, "SDefVertEdgeMap"); + if (vert_edges == NULL) { + modifier_setError((ModifierData *)smd, "Out of memory"); + return false; + } + + adj_array = MEM_mallocN(sizeof(*adj_array) * tnumedges * 2, "SDefVertEdge"); + if (adj_array == NULL) { + modifier_setError((ModifierData *)smd, "Out of memory"); + MEM_freeN(vert_edges); + return false; + } + + edge_polys = MEM_callocN(sizeof(*edge_polys) * tnumedges, "SDefEdgeFaceMap"); + if (edge_polys == NULL) { + modifier_setError((ModifierData *)smd, "Out of memory"); + MEM_freeN(vert_edges); + MEM_freeN(adj_array); + return false; + } + + smd->verts = MEM_mallocN(sizeof(*smd->verts) * numverts, "SDefBindVerts"); + if (smd->verts == NULL) { + modifier_setError((ModifierData *)smd, "Out of memory"); + freeAdjacencyMap(vert_edges, adj_array, edge_polys); + return false; + } + + bvhtree_from_mesh_looptri(&treeData, tdm, 0.0, 2, 6); + if (treeData.tree == NULL) { + modifier_setError((ModifierData *)smd, "Out of memory"); + freeAdjacencyMap(vert_edges, adj_array, edge_polys); + MEM_freeN(smd->verts); + smd->verts = NULL; + return false; + } + + adj_result = buildAdjacencyMap(mpoly, medge, mloop, tnumpoly, tnumedges, vert_edges, adj_array, edge_polys); + + if (adj_result == MOD_SDEF_BIND_RESULT_NONMANY_ERR) { + modifier_setError((ModifierData *)smd, "Target has edges with more than two polys"); + freeAdjacencyMap(vert_edges, adj_array, edge_polys); + free_bvhtree_from_mesh(&treeData); + MEM_freeN(smd->verts); + smd->verts = NULL; + return false; + } + + smd->numverts = numverts; + smd->numpoly = tnumpoly; + + SDefBindCalcData data = {.treeData = &treeData, + .vert_edges = vert_edges, + .edge_polys = edge_polys, + .mpoly = mpoly, + .medge = medge, + .mloop = mloop, + .looptri = tdm->getLoopTriArray(tdm), + .targetCos = MEM_mallocN(sizeof(float[3]) * tnumverts, "SDefTargetBindVertArray"), + .bind_verts = smd->verts, + .vertexCos = vertexCos, + .falloff = smd->falloff, + .success = MOD_SDEF_BIND_RESULT_SUCCESS}; + + if (data.targetCos == NULL) { + modifier_setError((ModifierData *)smd, "Out of memory"); + freeData((ModifierData *)smd); + return false; + } + + invert_m4_m4(data.imat, smd->mat); + + for (int i = 0; i < tnumverts; i++) { + mul_v3_m4v3(data.targetCos[i], smd->mat, mvert[i].co); + } + + BLI_task_parallel_range_ex(0, numverts, &data, NULL, 0, bindVert, + numverts > 10000, false); + + MEM_freeN(data.targetCos); + + if (data.success == MOD_SDEF_BIND_RESULT_MEM_ERR) { + modifier_setError((ModifierData *)smd, "Out of memory"); + freeData((ModifierData *)smd); + } + else if (data.success == MOD_SDEF_BIND_RESULT_NONMANY_ERR) { + modifier_setError((ModifierData *)smd, "Target has edges with more than two polys"); + freeData((ModifierData *)smd); + } + else if (data.success == MOD_SDEF_BIND_RESULT_CONCAVE_ERR) { + modifier_setError((ModifierData *)smd, "Target contains concave polys"); + freeData((ModifierData *)smd); + } + else if (data.success == MOD_SDEF_BIND_RESULT_OVERLAP_ERR) { + modifier_setError((ModifierData *)smd, "Target contains overlapping verts"); + freeData((ModifierData *)smd); + } + else if (data.success == MOD_SDEF_BIND_RESULT_GENERIC_ERR) { + /* I know this message is vague, but I could not think of a way + * to explain this whith a reasonably sized message. + * Though it shouldn't really matter all that much, + * because this is very unlikely to occur */ + modifier_setError((ModifierData *)smd, "Target contains invalid polys"); + freeData((ModifierData *)smd); + } + + freeAdjacencyMap(vert_edges, adj_array, edge_polys); + free_bvhtree_from_mesh(&treeData); + + return data.success == 1; +} + +static void deformVert(void *userdata, void *UNUSED(userdata_chunk), const int index, const int UNUSED(threadid)) +{ + const SDefDeformData * const data = (SDefDeformData *)userdata; + const SDefBind *sdbind = data->bind_verts[index].binds; + float * const vertexCos = data->vertexCos[index]; + float norm[3], temp[3]; + + zero_v3(vertexCos); + + for (int j = 0; j < data->bind_verts[index].numbinds; j++, sdbind++) { + /* Mode-generic operations (allocate poly coordinates) */ + float (*coords)[3] = MEM_mallocN(sizeof(*coords) * sdbind->numverts, "SDefDoPolyCoords"); + + for (int k = 0; k < sdbind->numverts; k++) { + copy_v3_v3(coords[k], data->targetCos[sdbind->vert_inds[k]]); + } + + normal_poly_v3(norm, coords, sdbind->numverts); + zero_v3(temp); + + /* ---------- looptri mode ---------- */ + if (sdbind->mode == MOD_SDEF_MODE_LOOPTRI) { + madd_v3_v3fl(temp, data->targetCos[sdbind->vert_inds[0]], sdbind->vert_weights[0]); + madd_v3_v3fl(temp, data->targetCos[sdbind->vert_inds[1]], sdbind->vert_weights[1]); + madd_v3_v3fl(temp, data->targetCos[sdbind->vert_inds[2]], sdbind->vert_weights[2]); + } + else { + /* ---------- ngon mode ---------- */ + if (sdbind->mode == MOD_SDEF_MODE_NGON) { + for (int k = 0; k < sdbind->numverts; k++) { + madd_v3_v3fl(temp, coords[k], sdbind->vert_weights[k]); + } + } + + /* ---------- centroid mode ---------- */ + else if (sdbind->mode == MOD_SDEF_MODE_CENTROID) { + float cent[3]; + mid_v3_v3_array(cent, coords, sdbind->numverts); + + madd_v3_v3fl(temp, data->targetCos[sdbind->vert_inds[0]], sdbind->vert_weights[0]); + madd_v3_v3fl(temp, data->targetCos[sdbind->vert_inds[1]], sdbind->vert_weights[1]); + madd_v3_v3fl(temp, cent, sdbind->vert_weights[2]); + } + } + + MEM_freeN(coords); + + /* Apply normal offset (generic for all modes) */ + madd_v3_v3fl(temp, norm, sdbind->normal_dist); + + madd_v3_v3fl(vertexCos, temp, sdbind->influence); + } +} + +static void surfacedeformModifier_do(ModifierData *md, float (*vertexCos)[3], unsigned int numverts, Object *ob) +{ + SurfaceDeformModifierData *smd = (SurfaceDeformModifierData *)md; + DerivedMesh *tdm; + unsigned int tnumverts, tnumpoly; + + /* Exit function if bind flag is not set (free bind data if any) */ + if (!(smd->flags & MOD_SDEF_BIND)) { + freeData(md); + return; + } + + /* Handle target mesh both in and out of edit mode */ + if (smd->target == md->scene->obedit) { + BMEditMesh *em = BKE_editmesh_from_object(smd->target); + tdm = em->derivedFinal; + } + else { + tdm = smd->target->derivedFinal; + } + + tnumverts = tdm->getNumVerts(tdm); + tnumpoly = tdm->getNumPolys(tdm); + + /* If not bound, execute bind */ + if (!(smd->verts)) { + float tmp_mat[4][4]; + + invert_m4_m4(tmp_mat, ob->obmat); + mul_m4_m4m4(smd->mat, tmp_mat, smd->target->obmat); + + if (!surfacedeformBind(smd, vertexCos, numverts, tnumpoly, tnumverts, tdm)) { + smd->flags &= ~MOD_SDEF_BIND; + return; + } + } + + /* Poly count checks */ + if (smd->numverts != numverts) { + modifier_setError(md, "Verts changed from %u to %u", smd->numverts, numverts); + tdm->release(tdm); + return; + } + else if (smd->numpoly != tnumpoly) { + modifier_setError(md, "Target polygons changed from %u to %u", smd->numpoly, tnumpoly); + tdm->release(tdm); + return; + } + + /* Actual vertex location update starts here */ + SDefDeformData data = {.bind_verts = smd->verts, + .targetCos = MEM_mallocN(sizeof(float[3]) * tnumverts, "SDefTargetVertArray"), + .vertexCos = vertexCos}; + + if (data.targetCos != NULL) { + bool tdm_vert_alloc; + const MVert * const mvert = DM_get_vert_array(tdm, &tdm_vert_alloc); + + for (int i = 0; i < tnumverts; i++) { + mul_v3_m4v3(data.targetCos[i], smd->mat, mvert[i].co); + } + + BLI_task_parallel_range_ex(0, numverts, &data, NULL, 0, deformVert, + numverts > 10000, false); + + if (tdm_vert_alloc) { + MEM_freeN((void *)mvert); + } + + MEM_freeN(data.targetCos); + } + + tdm->release(tdm); +} + +static void deformVerts(ModifierData *md, Object *ob, + DerivedMesh *UNUSED(derivedData), + float (*vertexCos)[3], int numVerts, + ModifierApplyFlag UNUSED(flag)) +{ + surfacedeformModifier_do(md, vertexCos, numVerts, ob); +} + +static void deformVertsEM(ModifierData *md, Object *ob, + struct BMEditMesh *UNUSED(editData), + DerivedMesh *UNUSED(derivedData), + float (*vertexCos)[3], int numVerts) +{ + surfacedeformModifier_do(md, vertexCos, numVerts, ob); +} + +static bool isDisabled(ModifierData *md, int UNUSED(useRenderParams)) +{ + SurfaceDeformModifierData *smd = (SurfaceDeformModifierData *)md; + + return !smd->target; +} + +ModifierTypeInfo modifierType_SurfaceDeform = { + /* name */ "Surface Deform", + /* structName */ "SurfaceDeformModifierData", + /* structSize */ sizeof(SurfaceDeformModifierData), + /* type */ eModifierTypeType_OnlyDeform, + /* flags */ eModifierTypeFlag_AcceptsMesh | + eModifierTypeFlag_SupportsEditmode, + + /* copyData */ copyData, + /* deformVerts */ deformVerts, + /* deformMatrices */ NULL, + /* deformVertsEM */ deformVertsEM, + /* deformMatricesEM */ NULL, + /* applyModifier */ NULL, + /* applyModifierEM */ NULL, + /* initData */ initData, + /* requiredDataMask */ NULL, + /* freeData */ freeData, + /* isDisabled */ isDisabled, + /* updateDepgraph */ updateDepgraph, + /* updateDepsgraph */ updateDepsgraph, + /* dependsOnTime */ NULL, + /* dependsOnNormals */ NULL, + /* foreachObjectLink */ foreachObjectLink, + /* foreachIDLink */ NULL, + /* foreachTexLink */ NULL, +}; diff --git a/source/blender/modifiers/intern/MOD_util.c b/source/blender/modifiers/intern/MOD_util.c index 93414562ccf..ded1f0b77e6 100644 --- a/source/blender/modifiers/intern/MOD_util.c +++ b/source/blender/modifiers/intern/MOD_util.c @@ -287,5 +287,6 @@ void modifier_type_init(ModifierTypeInfo *types[]) INIT_TYPE(NormalEdit); INIT_TYPE(CorrectiveSmooth); INIT_TYPE(MeshSequenceCache); + INIT_TYPE(SurfaceDeform); #undef INIT_TYPE } diff --git a/source/blender/nodes/composite/nodes/node_composite_glare.c b/source/blender/nodes/composite/nodes/node_composite_glare.c index c512ea49586..76020e55463 100644 --- a/source/blender/nodes/composite/nodes/node_composite_glare.c +++ b/source/blender/nodes/composite/nodes/node_composite_glare.c @@ -50,7 +50,8 @@ static void node_composit_init_glare(bNodeTree *UNUSED(ntree), bNode *node) ndg->colmod = 0.25; ndg->mix = 0; ndg->threshold = 1; - ndg->angle = 4; + ndg->star_45 = true; + ndg->streaks = 4; ndg->angle_ofs = 0.0f; ndg->fade = 0.9; ndg->size = 8; diff --git a/source/blender/nodes/shader/nodes/node_shader_fresnel.c b/source/blender/nodes/shader/nodes/node_shader_fresnel.c index d5e11795fc0..5a9e33a4053 100644 --- a/source/blender/nodes/shader/nodes/node_shader_fresnel.c +++ b/source/blender/nodes/shader/nodes/node_shader_fresnel.c @@ -64,10 +64,11 @@ static void node_shader_exec_fresnel(void *data, int UNUSED(thread), bNode *UNUS copy_v3_v3(n, shi->vn); } - if(shi->use_world_space_shading) + if (shi->use_world_space_shading) { mul_mat3_m4_v3((float (*)[4])RE_render_current_get_matrix(RE_VIEW_MATRIX), n); + } - out[0]->vec[0] = RE_fresnel_dielectric(shi->view, n, shi->flippednor ? 1/eta : eta); + out[0]->vec[0] = RE_fresnel_dielectric(shi->view, n, shi->flippednor ? 1 / eta : eta); } /* node type definition */ diff --git a/source/blender/nodes/shader/nodes/node_shader_layer_weight.c b/source/blender/nodes/shader/nodes/node_shader_layer_weight.c index 90e2625b961..a0b2408a7bb 100644 --- a/source/blender/nodes/shader/nodes/node_shader_layer_weight.c +++ b/source/blender/nodes/shader/nodes/node_shader_layer_weight.c @@ -69,7 +69,7 @@ static void node_shader_exec_layer_weight(void *data, int UNUSED(thread), bNode if (shi->use_world_space_shading) mul_mat3_m4_v3((float (*)[4])RE_render_current_get_matrix(RE_VIEW_MATRIX), n); - out[0]->vec[0] = RE_fresnel_dielectric(shi->view, n, shi->flippednor ? eta : 1/eta); + out[0]->vec[0] = RE_fresnel_dielectric(shi->view, n, shi->flippednor ? eta : 1 / eta); float facing = fabs(dot_v3v3(shi->view, n)); if (blend != 0.5) { diff --git a/source/blender/nodes/shader/nodes/node_shader_tex_brick.c b/source/blender/nodes/shader/nodes/node_shader_tex_brick.c index 0be47c4f751..1dfebc45d60 100644 --- a/source/blender/nodes/shader/nodes/node_shader_tex_brick.c +++ b/source/blender/nodes/shader/nodes/node_shader_tex_brick.c @@ -64,7 +64,7 @@ static void node_shader_init_tex_brick(bNodeTree *UNUSED(ntree), bNode *node) for (bNodeSocket *sock = node->inputs.first; sock; sock = sock->next) { if (STREQ(sock->name, "Mortar Smooth")) { - ((bNodeSocketValueFloat*)sock->default_value)->value = 0.1f; + ((bNodeSocketValueFloat *)sock->default_value)->value = 0.1f; } } } diff --git a/source/blender/python/intern/gpu_offscreen.c b/source/blender/python/intern/gpu_offscreen.c index c4863b2a92f..7711ce18bd0 100644 --- a/source/blender/python/intern/gpu_offscreen.c +++ b/source/blender/python/intern/gpu_offscreen.c @@ -202,7 +202,7 @@ static PyObject *pygpu_offscreen_draw_view3d(BPy_GPUOffScreen *self, PyObject *a ARegion *ar; GPUFX *fx; GPUFXSettings fx_settings; - void *rv3d_mats; + struct RV3DMatrixStore *rv3d_mats; BPY_GPU_OFFSCREEN_CHECK_OBJ(self); diff --git a/source/blender/render/CMakeLists.txt b/source/blender/render/CMakeLists.txt index 9e40ab02ee4..569b207c966 100644 --- a/source/blender/render/CMakeLists.txt +++ b/source/blender/render/CMakeLists.txt @@ -35,6 +35,7 @@ set(INC ../makesdna ../makesrna ../physics + ../../../intern/atomic ../../../intern/guardedalloc ../../../intern/mikktspace ../../../intern/smoke/extern diff --git a/source/blender/render/intern/source/pointdensity.c b/source/blender/render/intern/source/pointdensity.c index a03ea9cb896..fb047aad897 100644 --- a/source/blender/render/intern/source/pointdensity.c +++ b/source/blender/render/intern/source/pointdensity.c @@ -983,11 +983,12 @@ void RE_point_density_minmax( } else { float radius[3] = {pd->radius, pd->radius, pd->radius}; - float *loc, *size; + BoundBox *bb = BKE_object_boundbox_get(object); - if (BKE_object_obdata_texspace_get(pd->object, NULL, &loc, &size, NULL)) { - sub_v3_v3v3(r_min, loc, size); - add_v3_v3v3(r_max, loc, size); + if (bb != NULL) { + BLI_assert((bb->flag & BOUNDBOX_DIRTY) == 0); + copy_v3_v3(r_min, bb->vec[0]); + copy_v3_v3(r_max, bb->vec[6]); /* Adjust texture space to include density points on the boundaries. */ sub_v3_v3(r_min, radius); add_v3_v3(r_max, radius); diff --git a/source/blender/render/intern/source/volume_precache.c b/source/blender/render/intern/source/volume_precache.c index 5377d0eba00..752a9df0b79 100644 --- a/source/blender/render/intern/source/volume_precache.c +++ b/source/blender/render/intern/source/volume_precache.c @@ -60,6 +60,8 @@ #include "volumetric.h" #include "volume_precache.h" +#include "atomic_ops.h" + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ /* defined in pipeline.c, is hardcopy of active dynamic allocated Render */ @@ -509,7 +511,8 @@ static void *vol_precache_part_test(void *data) */ typedef struct VolPrecacheState { double lasttime; - int totparts; + unsigned int doneparts; + unsigned int totparts; } VolPrecacheState; static void vol_precache_part(TaskPool * __restrict pool, void *taskdata, int UNUSED(threadid)) @@ -574,13 +577,15 @@ static void vol_precache_part(TaskPool * __restrict pool, void *taskdata, int UN } } + unsigned int doneparts = atomic_add_and_fetch_u(&state->doneparts, 1); + time = PIL_check_seconds_timer(); if (time - state->lasttime > 1.0) { ThreadMutex *mutex = BLI_task_pool_user_mutex(pool); if (BLI_mutex_trylock(mutex)) { char str[64]; - float ratio = (float)BLI_task_pool_tasks_done(pool)/(float)state->totparts; + float ratio = (float)doneparts/(float)state->totparts; BLI_snprintf(str, sizeof(str), IFACE_("Precaching volume: %d%%"), (int)(100.0f * ratio)); re->i.infostr = str; re->stats_draw(re->sdh, &re->i); @@ -631,6 +636,7 @@ static void precache_launch_parts(Render *re, RayObject *tree, ShadeInput *shi, /* setup task scheduler */ memset(&state, 0, sizeof(state)); + state.doneparts = 0; state.totparts = parts[0]*parts[1]*parts[2]; state.lasttime = PIL_check_seconds_timer(); diff --git a/source/blender/windowmanager/intern/wm_event_system.c b/source/blender/windowmanager/intern/wm_event_system.c index d2b0acd836b..eba132062c9 100644 --- a/source/blender/windowmanager/intern/wm_event_system.c +++ b/source/blender/windowmanager/intern/wm_event_system.c @@ -3179,6 +3179,8 @@ void wm_event_add_ghostevent(wmWindowManager *wm, wmWindow *win, int type, int U GHOST_TEventCursorData *cd = customdata; copy_v2_v2_int(&event.x, &cd->x); + wm_stereo3d_mouse_offset_apply(win, &event.x); + event.type = MOUSEMOVE; wm_event_add_mousemove(win, &event); copy_v2_v2_int(&evt->x, &event.x); diff --git a/source/blender/windowmanager/intern/wm_init_exit.c b/source/blender/windowmanager/intern/wm_init_exit.c index c11c398c616..4b2369a1a7c 100644 --- a/source/blender/windowmanager/intern/wm_init_exit.c +++ b/source/blender/windowmanager/intern/wm_init_exit.c @@ -444,8 +444,6 @@ void WM_exit_ext(bContext *C, const bool do_python) { wmWindowManager *wm = C ? CTX_wm_manager(C) : NULL; - BKE_sound_exit(); - /* first wrap up running stuff, we assume only the active WM is running */ /* modal handlers are on window level freed, others too? */ /* note; same code copied in wm_files.c */ @@ -591,6 +589,10 @@ void WM_exit_ext(bContext *C, const bool do_python) BLI_threadapi_exit(); + /* No need to call this early, rather do it late so that other pieces of Blender using sound may exit cleanly, + * see also T50676. */ + BKE_sound_exit(); + BKE_blender_atexit(); if (MEM_get_memory_blocks_in_use() != 0) { diff --git a/source/blender/windowmanager/intern/wm_stereo.c b/source/blender/windowmanager/intern/wm_stereo.c index 46cee907991..66ebf18c9e1 100644 --- a/source/blender/windowmanager/intern/wm_stereo.c +++ b/source/blender/windowmanager/intern/wm_stereo.c @@ -345,6 +345,32 @@ bool WM_stereo3d_enabled(wmWindow *win, bool skip_stereo3d_check) return true; } +/** + * If needed, this adjusts \a r_mouse_xy so that drawn cursor and handled mouse position are matching visually. +*/ +void wm_stereo3d_mouse_offset_apply(wmWindow *win, int *r_mouse_xy) +{ + if (!WM_stereo3d_enabled(win, false)) + return; + + if (win->stereo3d_format->display_mode == S3D_DISPLAY_SIDEBYSIDE) { + const int half_x = win->sizex / 2; + /* right half of the screen */ + if (r_mouse_xy[0] > half_x) { + r_mouse_xy[0] -= half_x; + } + r_mouse_xy[0] *= 2; + } + else if (win->stereo3d_format->display_mode == S3D_DISPLAY_TOPBOTTOM) { + const int half_y = win->sizey / 2; + /* upper half of the screen */ + if (r_mouse_xy[1] > half_y) { + r_mouse_xy[1] -= half_y; + } + r_mouse_xy[1] *= 2; + } +} + /************************** Stereo 3D operator **********************************/ typedef struct Stereo3dData { Stereo3dFormat stereo3d_format; diff --git a/source/blender/windowmanager/wm.h b/source/blender/windowmanager/wm.h index 2f06ddab1e8..e8485359490 100644 --- a/source/blender/windowmanager/wm.h +++ b/source/blender/windowmanager/wm.h @@ -78,6 +78,7 @@ void wm_autosave_location(char *filepath); /* wm_stereo.c */ void wm_method_draw_stereo3d(const bContext *C, wmWindow *win); +void wm_stereo3d_mouse_offset_apply(wmWindow *win, int *r_mouse_xy); int wm_stereo3d_set_exec(bContext *C, wmOperator *op); int wm_stereo3d_set_invoke(bContext *C, wmOperator *op, const wmEvent *event); void wm_stereo3d_set_draw(bContext *C, wmOperator *op); diff --git a/source/creator/creator_args.c b/source/creator/creator_args.c index 27579e58dba..658a0b2db08 100644 --- a/source/creator/creator_args.c +++ b/source/creator/creator_args.c @@ -946,7 +946,7 @@ static int arg_handle_native_pixels_set(int UNUSED(argc), const char **UNUSED(ar } static const char arg_handle_with_borders_doc[] = -"\n\tForce opening without borders" +"\n\tForce opening with borders" ; static int arg_handle_with_borders(int UNUSED(argc), const char **UNUSED(argv), void *UNUSED(data)) { @@ -1364,7 +1364,7 @@ static int arg_handle_render_frame(int argc, const char **argv, void *data) re = RE_NewRender(scene->id.name); BLI_begin_threaded_malloc(); - BKE_reports_init(&reports, RPT_PRINT); + BKE_reports_init(&reports, RPT_STORE); RE_SetReports(re, &reports); for (int i = 0; i < frames_range_len; i++) { @@ -1379,6 +1379,7 @@ static int arg_handle_render_frame(int argc, const char **argv, void *data) } } RE_SetReports(re, NULL); + BKE_reports_clear(&reports); BLI_end_threaded_malloc(); MEM_freeN(frame_range_arr); return 1; @@ -1406,10 +1407,11 @@ static int arg_handle_render_animation(int UNUSED(argc), const char **UNUSED(arg Render *re = RE_NewRender(scene->id.name); ReportList reports; BLI_begin_threaded_malloc(); - BKE_reports_init(&reports, RPT_PRINT); + BKE_reports_init(&reports, RPT_STORE); RE_SetReports(re, &reports); RE_BlenderAnim(re, bmain, scene, NULL, scene->lay, scene->r.sfra, scene->r.efra, scene->r.frame_step); RE_SetReports(re, NULL); + BKE_reports_clear(&reports); BLI_end_threaded_malloc(); } else { diff --git a/source/gameengine/VideoTexture/FilterColor.cpp b/source/gameengine/VideoTexture/FilterColor.cpp index eed84a8580c..15a7e9e4cd1 100644 --- a/source/gameengine/VideoTexture/FilterColor.cpp +++ b/source/gameengine/VideoTexture/FilterColor.cpp @@ -68,7 +68,7 @@ PyTypeObject FilterGrayType = 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT, /*tp_flags*/ - "Filter for gray scale effect", /* tp_doc */ + "Filter for grayscale effect", /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ diff --git a/source/gameengine/VideoTexture/FilterColor.h b/source/gameengine/VideoTexture/FilterColor.h index 350f7270874..d042863d7e8 100644 --- a/source/gameengine/VideoTexture/FilterColor.h +++ b/source/gameengine/VideoTexture/FilterColor.h @@ -36,7 +36,7 @@ #include "FilterBase.h" -/// pixel filter for gray scale +/// pixel filter for grayscale class FilterGray : public FilterBase { public: @@ -53,7 +53,7 @@ protected: // calculate gray value unsigned int gray = (28 * (VT_B(val)) + 151 * (VT_G(val)) + 77 * (VT_R(val))) >> 8; - // return gray scale value + // return grayscale value VT_R(val) = gray; VT_G(val) = gray; VT_B(val) = gray; diff --git a/tests/gtests/blenlib/BLI_array_store_test.cc b/tests/gtests/blenlib/BLI_array_store_test.cc index 5af6e639e64..370a4111bae 100644 --- a/tests/gtests/blenlib/BLI_array_store_test.cc +++ b/tests/gtests/blenlib/BLI_array_store_test.cc @@ -36,15 +36,15 @@ static void print_mem_saved(const char *id, const BArrayStore *bs) /* -------------------------------------------------------------------- */ /* Test Chunks (building data from list of chunks) */ -typedef struct TestChunnk { - struct TestChunnk *next, *prev; +typedef struct TestChunk { + struct TestChunk *next, *prev; const void *data; size_t data_len; -} TestChunnk; +} TestChunk; -static TestChunnk *testchunk_list_add(ListBase *lb, const void *data, size_t data_len) +static TestChunk *testchunk_list_add(ListBase *lb, const void *data, size_t data_len) { - TestChunnk *tc = (TestChunnk *)MEM_mallocN(sizeof(*tc), __func__); + TestChunk *tc = (TestChunk *)MEM_mallocN(sizeof(*tc), __func__); tc->data = data; tc->data_len = data_len; BLI_addtail(lb, tc); @@ -53,7 +53,7 @@ static TestChunnk *testchunk_list_add(ListBase *lb, const void *data, size_t dat } #if 0 -static TestChunnk *testchunk_list_add_copydata(ListBase *lb, const void *data, size_t data_len) +static TestChunk *testchunk_list_add_copydata(ListBase *lb, const void *data, size_t data_len) { void *data_copy = MEM_mallocN(data_len, __func__); memcpy(data_copy, data, data_len); @@ -63,7 +63,7 @@ static TestChunnk *testchunk_list_add_copydata(ListBase *lb, const void *data, s static void testchunk_list_free(ListBase *lb) { - for (TestChunnk *tc = (TestChunnk *)lb->first, *tb_next; tc; tc = tb_next) { + for (TestChunk *tc = (TestChunk *)lb->first, *tb_next; tc; tc = tb_next) { tb_next = tc->next; MEM_freeN((void *)tc->data); MEM_freeN(tc); @@ -77,12 +77,12 @@ static char *testchunk_as_data( size_t *r_data_len) { size_t data_len = 0; - for (TestChunnk *tc = (TestChunnk *)lb->first; tc; tc = tc->next) { + for (TestChunk *tc = (TestChunk *)lb->first; tc; tc = tc->next) { data_len += tc->data_len; } char *data = (char *)MEM_mallocN(data_len, __func__); size_t i = 0; - for (TestChunnk *tc = (TestChunnk *)lb->first; tc; tc = tc->next) { + for (TestChunk *tc = (TestChunk *)lb->first; tc; tc = tc->next) { memcpy(&data[i], tc->data, tc->data_len); data_len += tc->data_len; i += tc->data_len; @@ -95,7 +95,7 @@ static char *testchunk_as_data( #endif static char *testchunk_as_data_array( - TestChunnk **tc_array, int tc_array_len, + TestChunk **tc_array, int tc_array_len, size_t *r_data_len) { size_t data_len = 0; @@ -105,7 +105,7 @@ static char *testchunk_as_data_array( char *data = (char *)MEM_mallocN(data_len, __func__); size_t i = 0; for (int tc_index = 0; tc_index < tc_array_len; tc_index++) { - TestChunnk *tc = tc_array[tc_index]; + TestChunk *tc = tc_array[tc_index]; memcpy(&data[i], tc->data, tc->data_len); i += tc->data_len; } @@ -677,9 +677,9 @@ static void random_chunk_mutate_helper( ListBase random_chunks; BLI_listbase_clear(&random_chunks); random_chunk_generate(&random_chunks, chunks_per_buffer, stride, chunk_count, random_seed); - TestChunnk **chunks_array = (TestChunnk **)MEM_mallocN(chunks_per_buffer * sizeof(TestChunnk *), __func__); + TestChunk **chunks_array = (TestChunk **)MEM_mallocN(chunks_per_buffer * sizeof(TestChunk *), __func__); { - TestChunnk *tc = (TestChunnk *)random_chunks.first; + TestChunk *tc = (TestChunk *)random_chunks.first; for (int i = 0; i < chunks_per_buffer; i++, tc = tc->next) { chunks_array[i] = tc; } @@ -692,7 +692,7 @@ static void random_chunk_mutate_helper( { RNG *rng = BLI_rng_new(random_seed); for (int i = 0; i < items_total; i++) { - BLI_rng_shuffle_array(rng, chunks_array, sizeof(TestChunnk *), chunks_per_buffer); + BLI_rng_shuffle_array(rng, chunks_array, sizeof(TestChunk *), chunks_per_buffer); size_t data_len; char *data = testchunk_as_data_array(chunks_array, chunks_per_buffer, &data_len); BLI_assert(data_len == chunks_per_buffer * chunk_count * stride); diff --git a/tests/python/CMakeLists.txt b/tests/python/CMakeLists.txt index 935a2a807fe..b76c47fcf25 100644 --- a/tests/python/CMakeLists.txt +++ b/tests/python/CMakeLists.txt @@ -95,6 +95,11 @@ add_test(bevel ${TEST_BLENDER_EXE} --python-text run_tests ) +add_test(split_faces ${TEST_BLENDER_EXE} + ${TEST_SRC_DIR}/modeling/split_faces_test.blend + --python-text run_tests +) + # ------------------------------------------------------------------------------ # IO TESTS @@ -421,6 +426,8 @@ if(WITH_CYCLES) if(WITH_OPENGL_TESTS) add_cycles_render_test(opengl) endif() + add_cycles_render_test(image) + add_cycles_render_test(mblur) add_cycles_render_test(reports) add_cycles_render_test(render) add_cycles_render_test(shader) |