diff options
Diffstat (limited to 'intern/cycles')
355 files changed, 14704 insertions, 12189 deletions
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 6f6bd7ec2cc..121c8bdad6e 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -177,14 +177,11 @@ if(CXX_HAS_AVX2) add_definitions(-DWITH_KERNEL_AVX2) endif() -if(WITH_CYCLES_OSL) - # LLVM and OSL need to build without RTTI - if(WIN32 AND MSVC) - set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID") - elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang")) - set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID") - endif() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${RTTI_DISABLE_FLAGS}") +# LLVM and OSL need to build without RTTI +if(WIN32 AND MSVC) + set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID") +elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang")) + set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID") endif() # Definitions and Includes @@ -228,11 +225,8 @@ if(WITH_CYCLES_DEVICE_OPTIX) SYSTEM ${OPTIX_INCLUDE_DIR} ) - - # Need pre-compiled CUDA binaries in the OptiX device - set(WITH_CYCLES_CUDA_BINARIES ON) else() - message(STATUS "Optix not found, disabling it from Cycles") + message(STATUS "OptiX not found, disabling it from Cycles") set(WITH_CYCLES_DEVICE_OPTIX OFF) endif() endif() @@ -319,9 +313,7 @@ if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER)) set(MAX_MSVC 1910) elseif(${CUDA_VERSION} EQUAL "9.1") set(MAX_MSVC 1911) - elseif(${CUDA_VERSION} EQUAL "10.0") - set(MAX_MSVC 1999) - elseif(${CUDA_VERSION} EQUAL "10.1") + elseif(${CUDA_VERSION} LESS "11.0") set(MAX_MSVC 1999) endif() if(NOT MSVC_VERSION LESS ${MAX_MSVC} OR CMAKE_C_COMPILER_ID MATCHES "Clang") @@ -338,7 +330,7 @@ if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER)) endif() # NVRTC gives wrong rendering result in CUDA 10.0, so we must use NVCC. -if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER) +if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER AND NOT WITH_CYCLES_CUBIN_COMPILER_OVERRRIDE) if(NOT (${CUDA_VERSION} VERSION_LESS 10.0)) message(STATUS "cycles_cubin_cc not supported for CUDA 10.0+, using nvcc instead.") set(WITH_CYCLES_CUBIN_COMPILER OFF) @@ -356,17 +348,6 @@ if(WITH_CYCLES_NETWORK) add_definitions(-DWITH_NETWORK) endif() -if(WITH_OPENCOLORIO) - add_definitions(-DWITH_OCIO) - include_directories( - SYSTEM - ${OPENCOLORIO_INCLUDE_DIRS} - ) - if(WIN32) - add_definitions(-DOpenColorIO_STATIC) - endif() -endif() - if(WITH_CYCLES_STANDALONE OR WITH_CYCLES_NETWORK OR WITH_CYCLES_CUBIN_COMPILER) add_subdirectory(app) endif() diff --git a/intern/cycles/app/CMakeLists.txt b/intern/cycles/app/CMakeLists.txt index d67a72ab7db..ef374f91a65 100644 --- a/intern/cycles/app/CMakeLists.txt +++ b/intern/cycles/app/CMakeLists.txt @@ -51,14 +51,17 @@ endif() # Common configuration. -link_directories(${OPENIMAGEIO_LIBPATH} - ${BOOST_LIBPATH} - ${PNG_LIBPATH} - ${JPEG_LIBPATH} - ${ZLIB_LIBPATH} - ${TIFF_LIBPATH} - ${OPENEXR_LIBPATH} - ${OPENJPEG_LIBPATH}) +link_directories( + ${OPENIMAGEIO_LIBPATH} + ${BOOST_LIBPATH} + ${PNG_LIBPATH} + ${JPEG_LIBPATH} + ${ZLIB_LIBPATH} + ${TIFF_LIBPATH} + ${OPENEXR_LIBPATH} + ${OPENJPEG_LIBPATH} + ${OPENVDB_LIBPATH} +) if(WITH_OPENCOLORIO) link_directories(${OPENCOLORIO_LIBPATH}) diff --git a/intern/cycles/app/cycles_cubin_cc.cpp b/intern/cycles/app/cycles_cubin_cc.cpp index 774c18f4219..7631cb9bed5 100644 --- a/intern/cycles/app/cycles_cubin_cc.cpp +++ b/intern/cycles/app/cycles_cubin_cc.cpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#include <stdio.h> #include <stdint.h> +#include <stdio.h> #include <string> #include <vector> @@ -43,7 +43,8 @@ template<typename T> std::string to_string(const T &n) class CompilationSettings { public: - CompilationSettings() : target_arch(0), bits(64), verbose(false), fast_math(false) + CompilationSettings() + : target_arch(0), bits(64), verbose(false), fast_math(false), ptx_only(false) { } @@ -57,12 +58,13 @@ class CompilationSettings { int bits; bool verbose; bool fast_math; + bool ptx_only; }; static bool compile_cuda(CompilationSettings &settings) { - const char *headers[] = {"stdlib.h", "float.h", "math.h", "stdio.h"}; - const char *header_content[] = {"\n", "\n", "\n", "\n"}; + const char *headers[] = {"stdlib.h", "float.h", "math.h", "stdio.h", "stddef.h"}; + const char *header_content[] = {"\n", "\n", "\n", "\n", "\n"}; printf("Building %s\n", settings.input_file.c_str()); @@ -83,6 +85,8 @@ static bool compile_cuda(CompilationSettings &settings) options.push_back("-D__KERNEL_CUDA_VERSION__=" + std::to_string(cuewNvrtcVersion())); options.push_back("-arch=compute_" + std::to_string(settings.target_arch)); options.push_back("--device-as-default-execution-space"); + options.push_back("-DCYCLES_CUBIN_CC"); + options.push_back("--std=c++11"); if (settings.fast_math) options.push_back("--use_fast_math"); @@ -134,10 +138,14 @@ static bool compile_cuda(CompilationSettings &settings) fprintf(stderr, "Error: nvrtcGetPTX failed (%d)\n\n", (int)result); return false; } - - /* Write a file in the temp folder with the ptx code. */ - settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" + - OIIO::Filesystem::unique_path(); + if (settings.ptx_only) { + settings.ptx_file = settings.output_file; + } + else { + /* Write a file in the temp folder with the ptx code. */ + settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" + + OIIO::Filesystem::unique_path(); + } FILE *f = fopen(settings.ptx_file.c_str(), "wb"); fwrite(&ptx_code[0], 1, ptx_size, f); fclose(f); @@ -249,6 +257,9 @@ static bool parse_parameters(int argc, const char **argv, CompilationSettings &s "-D %L", &settings.defines, "Add additional defines", + "-ptx", + &settings.ptx_only, + "emit PTX code", "-v", &settings.verbose, "Use verbose logging", @@ -303,8 +314,10 @@ int main(int argc, const char **argv) exit(EXIT_FAILURE); } - if (!link_ptxas(settings)) { - exit(EXIT_FAILURE); + if (!settings.ptx_only) { + if (!link_ptxas(settings)) { + exit(EXIT_FAILURE); + } } return 0; diff --git a/intern/cycles/app/cycles_server.cpp b/intern/cycles/app/cycles_server.cpp index c5a4c9b375b..1ad70a376ed 100644 --- a/intern/cycles/app/cycles_server.cpp +++ b/intern/cycles/app/cycles_server.cpp @@ -20,11 +20,11 @@ #include "util/util_args.h" #include "util/util_foreach.h" +#include "util/util_logging.h" #include "util/util_path.h" #include "util/util_stats.h" #include "util/util_string.h" #include "util/util_task.h" -#include "util/util_logging.h" using namespace ccl; diff --git a/intern/cycles/app/cycles_standalone.cpp b/intern/cycles/app/cycles_standalone.cpp index d2d112e8d7e..e45c37be494 100644 --- a/intern/cycles/app/cycles_standalone.cpp +++ b/intern/cycles/app/cycles_standalone.cpp @@ -16,12 +16,12 @@ #include <stdio.h> +#include "device/device.h" #include "render/buffers.h" #include "render/camera.h" -#include "device/device.h" +#include "render/integrator.h" #include "render/scene.h" #include "render/session.h" -#include "render/integrator.h" #include "util/util_args.h" #include "util/util_foreach.h" diff --git a/intern/cycles/app/cycles_xml.cpp b/intern/cycles/app/cycles_xml.cpp index 1dbe8a30ff2..aec00f845f3 100644 --- a/intern/cycles/app/cycles_xml.cpp +++ b/intern/cycles/app/cycles_xml.cpp @@ -16,9 +16,9 @@ #include <stdio.h> -#include <sstream> #include <algorithm> #include <iterator> +#include <sstream> #include "graph/node_xml.h" @@ -32,8 +32,8 @@ #include "render/nodes.h" #include "render/object.h" #include "render/osl.h" -#include "render/shader.h" #include "render/scene.h" +#include "render/shader.h" #include "subd/subd_patch.h" #include "subd/subd_split.h" @@ -292,7 +292,7 @@ static void xml_read_shader_graph(XMLReadState &state, Shader *shader, xml_node filepath = path_join(state.base, filepath); } - snode = ((OSLShaderManager *)manager)->osl_node(filepath); + snode = OSLShaderManager::osl_node(manager, filepath); if (!snode) { fprintf(stderr, "Failed to create OSL node from \"%s\".\n", filepath.c_str()); @@ -326,6 +326,10 @@ static void xml_read_shader_graph(XMLReadState &state, Shader *shader, xml_node fprintf(stderr, "Node type \"%s\" is not a shader node.\n", node_type->name.c_str()); continue; } + else if (node_type->create == NULL) { + fprintf(stderr, "Can't create abstract node type \"%s\".\n", node_type->name.c_str()); + continue; + } snode = (ShaderNode *)node_type->create(node_type); } @@ -376,11 +380,11 @@ static Mesh *xml_add_mesh(Scene *scene, const Transform &tfm) { /* create mesh */ Mesh *mesh = new Mesh(); - scene->meshes.push_back(mesh); + scene->geometry.push_back(mesh); /* create object*/ Object *object = new Object(); - object->mesh = mesh; + object->geometry = mesh; object->tfm = tfm; scene->objects.push_back(object); diff --git a/intern/cycles/blender/CMakeLists.txt b/intern/cycles/blender/CMakeLists.txt index 0888eeb78bb..496e8e9310b 100644 --- a/intern/cycles/blender/CMakeLists.txt +++ b/intern/cycles/blender/CMakeLists.txt @@ -18,6 +18,9 @@ set(INC_SYS set(SRC blender_camera.cpp blender_device.cpp + blender_image.cpp + blender_geometry.cpp + blender_light.cpp blender_mesh.cpp blender_object.cpp blender_object_cull.cpp @@ -30,9 +33,12 @@ set(SRC blender_sync.cpp blender_texture.cpp blender_viewport.cpp + blender_volume.cpp CCL_api.h blender_device.h + blender_id_map.h + blender_image.h blender_object_cull.h blender_sync.h blender_session.h @@ -86,6 +92,20 @@ if(WITH_MOD_FLUID) add_definitions(-DWITH_FLUID) endif() +if(WITH_NEW_OBJECT_TYPES) + add_definitions(-DWITH_NEW_OBJECT_TYPES) +endif() + +if(WITH_OPENVDB) + add_definitions(-DWITH_OPENVDB ${OPENVDB_DEFINITIONS}) + list(APPEND INC_SYS + ${OPENVDB_INCLUDE_DIRS} + ) + list(APPEND LIB + ${OPENVDB_LIBRARIES} + ) +endif() + blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}" "${LIB}") # avoid link failure with clang 3.4 debug diff --git a/intern/cycles/blender/addon/__init__.py b/intern/cycles/blender/addon/__init__.py index 6d6f89603fe..3d2a52d0cf6 100644 --- a/intern/cycles/blender/addon/__init__.py +++ b/intern/cycles/blender/addon/__init__.py @@ -22,7 +22,7 @@ bl_info = { "blender": (2, 80, 0), "description": "Cycles renderer integration", "warning": "", - "wiki_url": "https://docs.blender.org/manual/en/latest/render/cycles/", + "doc_url": "https://docs.blender.org/manual/en/latest/render/cycles/", "tracker_url": "", "support": 'OFFICIAL', "category": "Render"} diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py index ee7ac7737c0..2b872bb5c39 100644 --- a/intern/cycles/blender/addon/engine.py +++ b/intern/cycles/blender/addon/engine.py @@ -33,7 +33,7 @@ def _is_using_buggy_driver(): # in the version string, but those cards do not quite work and # causing crashes. return True - regex = re.compile(".*Compatibility Profile Context ([0-9]+(\.[0-9]+)+)$") + regex = re.compile(".*Compatibility Profile Context ([0-9]+(\\.[0-9]+)+)$") if not regex.match(version): # Skip cards like FireGL return False @@ -245,9 +245,6 @@ def list_render_passes(srl): if srl.use_pass_transmission_direct: yield ("TransDir", "RGB", 'COLOR') if srl.use_pass_transmission_indirect: yield ("TransInd", "RGB", 'COLOR') if srl.use_pass_transmission_color: yield ("TransCol", "RGB", 'COLOR') - if srl.use_pass_subsurface_direct: yield ("SubsurfaceDir", "RGB", 'COLOR') - if srl.use_pass_subsurface_indirect: yield ("SubsurfaceInd", "RGB", 'COLOR') - if srl.use_pass_subsurface_color: yield ("SubsurfaceCol", "RGB", 'COLOR') if srl.use_pass_emit: yield ("Emit", "RGB", 'COLOR') if srl.use_pass_environment: yield ("Env", "RGB", 'COLOR') @@ -258,6 +255,7 @@ def list_render_passes(srl): if crl.pass_debug_bvh_traversed_instances: yield ("Debug BVH Traversed Instances", "X", 'VALUE') if crl.pass_debug_bvh_intersections: yield ("Debug BVH Intersections", "X", 'VALUE') if crl.pass_debug_ray_bounces: yield ("Debug Ray Bounces", "X", 'VALUE') + if crl.pass_debug_sample_count: yield ("Debug Sample Count", "X", 'VALUE') if crl.use_pass_volume_direct: yield ("VolumeDir", "RGB", 'COLOR') if crl.use_pass_volume_indirect: yield ("VolumeInd", "RGB", 'COLOR') @@ -284,8 +282,7 @@ def list_render_passes(srl): yield ("Denoising Intensity", "X", 'VALUE') clean_options = ("denoising_diffuse_direct", "denoising_diffuse_indirect", "denoising_glossy_direct", "denoising_glossy_indirect", - "denoising_transmission_direct", "denoising_transmission_indirect", - "denoising_subsurface_direct", "denoising_subsurface_indirect") + "denoising_transmission_direct", "denoising_transmission_indirect") if any(getattr(crl, option) for option in clean_options): yield ("Denoising Clean", "RGB", 'COLOR') diff --git a/intern/cycles/blender/addon/operators.py b/intern/cycles/blender/addon/operators.py index 80bb663330b..3c8e79eaba5 100644 --- a/intern/cycles/blender/addon/operators.py +++ b/intern/cycles/blender/addon/operators.py @@ -153,12 +153,12 @@ class CYCLES_OT_denoise_animation(Operator): self.report({'ERROR'}, str(e)) return {'FINISHED'} - self.report({'INFO'}, "Denoising completed.") + self.report({'INFO'}, "Denoising completed") return {'FINISHED'} class CYCLES_OT_merge_images(Operator): - "Combine OpenEXR multilayer images rendered with different sample" \ + "Combine OpenEXR multilayer images rendered with different sample " \ "ranges into one image with reduced noise" bl_idname = "cycles.merge_images" bl_label = "Merge Images" diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 5f163c2510b..c91e210bbd8 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -112,6 +112,7 @@ enum_use_layer_samples = ( enum_sampling_pattern = ( ('SOBOL', "Sobol", "Use Sobol random sampling pattern"), ('CORRELATED_MUTI_JITTER', "Correlated Multi-Jitter", "Use Correlated Multi-Jitter random sampling pattern"), + ('PROGRESSIVE_MUTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern"), ) enum_integrator = ( @@ -178,10 +179,6 @@ enum_view3d_shading_render_pass= ( ('TRANSMISSION_INDIRECT', "Transmission Indirect", "Show the Transmission Indirect render pass", 45), ('TRANSMISSION_COLOR', "Transmission Color", "Show the Transmission Color render pass", 46), - ('SUBSURFACE_DIRECT', "Subsurface Direct", "Show the Subsurface Direct render pass", 47), - ('SUBSURFACE_INDIRECT', "Subsurface Indirect", "Show the Subsurface Indirect render pass", 48), - ('SUBSURFACE_COLOR', "Subsurface Color", "Show the Subsurface Color render pass", 49), - ('VOLUME_DIRECT', "Volume Direct", "Show the Volume Direct render pass", 50), ('VOLUME_INDIRECT', "Volume Indirect", "Show the Volume Indirect render pass", 51), @@ -197,7 +194,12 @@ enum_aov_types = ( ('COLOR', "Color", "Write a Color pass", 1), ) -enum_denoising_optix_input_passes= ( +enum_viewport_denoising = ( + ('NONE', "None", "Disable viewport denoising", 0), + ('OPTIX', "OptiX AI-Accelerated", "Use the OptiX denoiser running on the GPU (requires at least one compatible OptiX device)", 1), +) + +enum_denoising_optix_input_passes = ( ('RGB', "Color", "Use only color as input", 1), ('RGB_ALBEDO', "Color + Albedo", "Use color and albedo data as input", 2), ('RGB_ALBEDO_NORMAL', "Color + Albedo + Normal", "Use color, albedo and normal data as input", 3), @@ -229,6 +231,18 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): default='PATH', ) + preview_pause: BoolProperty( + name="Pause Preview", + description="Pause all viewport preview renders", + default=False, + ) + preview_denoising: EnumProperty( + name="Viewport Denoising", + description="Denoise the image after each preview update with the selected denoiser engine", + items=enum_viewport_denoising, + default='NONE', + ) + use_square_samples: BoolProperty( name="Square Samples", description="Square sampling values for easier artist control", @@ -247,11 +261,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): min=0, max=(1 << 24), default=32, ) - preview_pause: BoolProperty( - name="Pause Preview", - description="Pause all viewport preview renders", - default=False, - ) aa_samples: IntProperty( name="AA Samples", description="Number of antialiasing samples to render for each pixel", @@ -264,6 +273,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): min=0, max=2097151, default=32, ) + diffuse_samples: IntProperty( name="Diffuse Samples", description="Number of diffuse bounce samples to render for each AA sample", @@ -294,14 +304,12 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): min=1, max=1024, default=1, ) - subsurface_samples: IntProperty( name="Subsurface Samples", description="Number of subsurface scattering samples to render for each AA sample", min=1, max=1024, default=1, ) - volume_samples: IntProperty( name="Volume Samples", description="Number of volume scattering samples to render for each AA sample", @@ -342,6 +350,26 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): default=0.01, ) + use_adaptive_sampling: BoolProperty( + name="Use Adaptive Sampling", + description="Automatically reduce the number of samples per pixel based on estimated noise level", + default=False, + ) + + adaptive_threshold: FloatProperty( + name="Adaptive Sampling Threshold", + description="Noise level step to stop sampling at, lower values reduce noise the cost of render time. Zero for automatic setting based on number of AA samples", + min=0.0, max=1.0, + default=0.0, + precision=4, + ) + adaptive_min_samples: IntProperty( + name="Adaptive Min Samples", + description="Minimum AA samples for adaptive sampling, to discover noisy features before stopping sampling. Zero for automatic setting based on number of AA samples", + min=0, max=4096, + default=0, + ) + min_light_bounces: IntProperty( name="Min Light Bounces", description="Minimum number of light bounces. Setting this higher reduces noise in the first bounces, " @@ -416,13 +444,20 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): default=8, ) - volume_step_size: FloatProperty( - name="Step Size", - description="Distance between volume shader samples when rendering the volume " - "(lower values give more accurate and detailed results, but also increased render time)", - default=0.1, - min=0.0000001, max=100000.0, soft_min=0.01, soft_max=1.0, precision=4, - unit='LENGTH' + volume_step_rate: FloatProperty( + name="Step Rate", + description="Globally adjust detail for volume rendering, on top of automatically estimated step size. " + "Higher values reduce render time, lower values render with more detail", + default=1.0, + min=0.01, max=100.0, soft_min=0.1, soft_max=10.0, precision=2 + ) + + volume_preview_step_rate: FloatProperty( + name="Step Rate", + description="Globally adjust detail for volume rendering, on top of automatically estimated step size. " + "Higher values reduce render time, lower values render with more detail", + default=1.0, + min=0.01, max=100.0, soft_min=0.1, soft_max=10.0, precision=2 ) volume_max_steps: IntProperty( @@ -562,6 +597,12 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): default=64, subtype='PIXEL' ) + preview_denoising_start_sample: IntProperty( + name="Start Denoising", + description="Sample to start denoising the preview at", + min=0, max=(1 << 24), + default=1, + ) debug_reset_timeout: FloatProperty( name="Reset timeout", @@ -641,7 +682,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): ('DIFFUSE', "Diffuse", ""), ('GLOSSY', "Glossy", ""), ('TRANSMISSION', "Transmission", ""), - ('SUBSURFACE', "Subsurface", ""), ), ) @@ -901,6 +941,14 @@ class CyclesMaterialSettings(bpy.types.PropertyGroup): default='LINEAR', ) + volume_step_rate: FloatProperty( + name="Step Rate", + description="Scale the distance between volume shader samples when rendering the volume " + "(lower values give more accurate and detailed results, but also increased render time)", + default=1.0, + min=0.001, max=1000.0, soft_min=0.1, soft_max=10.0, precision=4 + ) + displacement_method: EnumProperty( name="Displacement Method", description="Method to use for the displacement", @@ -1011,6 +1059,13 @@ class CyclesWorldSettings(bpy.types.PropertyGroup): items=enum_volume_interpolation, default='LINEAR', ) + volume_step_size: FloatProperty( + name="Step Size", + description="Distance between volume shader samples when rendering the volume " + "(lower values give more accurate and detailed results, but also increased render time)", + default=1.0, + min=0.0000001, max=100000.0, soft_min=0.1, soft_max=100.0, precision=4 + ) @classmethod def register(cls): @@ -1121,7 +1176,7 @@ class CyclesObjectSettings(bpy.types.PropertyGroup): motion_steps: IntProperty( name="Motion Steps", description="Control accuracy of motion blur, more steps gives more memory usage (actual number of steps is 2^(steps - 1))", - min=1, soft_max=8, + min=1, max=7, default=1, ) @@ -1285,7 +1340,12 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup): default=False, update=update_render_passes, ) - + pass_debug_sample_count: BoolProperty( + name="Debug Sample Count", + description="Number of samples/camera rays per pixel", + default=False, + update=update_render_passes, + ) use_pass_volume_direct: BoolProperty( name="Volume Direct", description="Deliver direct volumetric scattering pass", @@ -1305,12 +1365,6 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup): default=False, update=update_render_passes, ) - use_optix_denoising: BoolProperty( - name="Use OptiX AI Denoising", - description="Denoise the rendered image with the OptiX AI denoiser", - default=False, - update=update_render_passes, - ) denoising_diffuse_direct: BoolProperty( name="Diffuse Direct", description="Denoise the direct diffuse lighting", @@ -1341,16 +1395,6 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup): description="Denoise the indirect transmission lighting", default=True, ) - denoising_subsurface_direct: BoolProperty( - name="Subsurface Direct", - description="Denoise the direct subsurface lighting", - default=True, - ) - denoising_subsurface_indirect: BoolProperty( - name="Subsurface Indirect", - description="Denoise the indirect subsurface lighting", - default=True, - ) denoising_strength: FloatProperty( name="Denoising Strength", description="Controls neighbor pixel weighting for the denoising filter (lower values preserve more detail, but aren't as smooth)", @@ -1387,11 +1431,18 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup): min=0, max=7, default=0, ) + + use_optix_denoising: BoolProperty( + name="OptiX AI-Accelerated", + description="Use the OptiX denoiser to denoise the rendered image", + default=False, + update=update_render_passes, + ) denoising_optix_input_passes: EnumProperty( name="Input Passes", - description="Controls which passes the OptiX AI denoiser should use as input, which can have different effects on the denoised image", + description="Passes handed over to the OptiX denoiser (this can have different effects on the denoised image)", items=enum_denoising_optix_input_passes, - default='RGB', + default='RGB_ALBEDO', ) use_pass_crypto_object: BoolProperty( diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 35d5d3801d2..37675c5699d 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -112,6 +112,10 @@ def show_device_active(context): return True return context.preferences.addons[__package__].preferences.has_active_device() +def show_optix_denoising(context): + # OptiX AI denoiser can be used when at least one device supports OptiX + return bool(context.preferences.addons[__package__].preferences.get_devices_for_type('OPTIX')) + def draw_samples_info(layout, context): cscene = context.scene.cycles @@ -177,17 +181,23 @@ class CYCLES_RENDER_PT_sampling(CyclesButtonsPanel, Panel): if not use_optix(context): layout.prop(cscene, "progressive") - if cscene.progressive == 'PATH' or use_branched_path(context) is False: + if not use_branched_path(context): col = layout.column(align=True) col.prop(cscene, "samples", text="Render") col.prop(cscene, "preview_samples", text="Viewport") - - draw_samples_info(layout, context) else: col = layout.column(align=True) col.prop(cscene, "aa_samples", text="Render") col.prop(cscene, "preview_aa_samples", text="Viewport") + # Viewport denoising is currently only supported with OptiX + if show_optix_denoising(context): + col = layout.column() + col.prop(cscene, "preview_denoising") + + if not use_branched_path(context): + draw_samples_info(layout, context) + class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel): bl_label = "Sub Samples" @@ -195,9 +205,7 @@ class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel): @classmethod def poll(cls, context): - scene = context.scene - cscene = scene.cycles - return cscene.progressive != 'PATH' and use_branched_path(context) + return use_branched_path(context) def draw(self, context): layout = self.layout @@ -222,6 +230,32 @@ class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel): draw_samples_info(layout, context) +class CYCLES_RENDER_PT_sampling_adaptive(CyclesButtonsPanel, Panel): + bl_label = "Adaptive Sampling" + bl_parent_id = "CYCLES_RENDER_PT_sampling" + bl_options = {'DEFAULT_CLOSED'} + + def draw_header(self, context): + layout = self.layout + scene = context.scene + cscene = scene.cycles + + layout.prop(cscene, "use_adaptive_sampling", text="") + + def draw(self, context): + layout = self.layout + layout.use_property_split = True + layout.use_property_decorate = False + + scene = context.scene + cscene = scene.cycles + + layout.active = cscene.use_adaptive_sampling + + col = layout.column(align=True) + col.prop(cscene, "adaptive_min_samples", text="Min Samples") + col.prop(cscene, "adaptive_threshold", text="Noise Threshold") + class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel): bl_label = "Advanced" bl_parent_id = "CYCLES_RENDER_PT_sampling" @@ -239,7 +273,9 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel): row.prop(cscene, "seed") row.prop(cscene, "use_animated_seed", text="", icon='TIME') - layout.prop(cscene, "sampling_pattern", text="Pattern") + col = layout.column(align=True) + col.active = not(cscene.use_adaptive_sampling) + col.prop(cscene, "sampling_pattern", text="Pattern") layout.prop(cscene, "use_square_samples") @@ -337,7 +373,7 @@ class CYCLES_RENDER_PT_subdivision(CyclesButtonsPanel, Panel): col = layout.column() sub = col.column(align=True) sub.prop(cscene, "dicing_rate", text="Dicing Rate Render") - sub.prop(cscene, "preview_dicing_rate", text="Preview") + sub.prop(cscene, "preview_dicing_rate", text="Viewport") col.separator() @@ -392,9 +428,11 @@ class CYCLES_RENDER_PT_volumes(CyclesButtonsPanel, Panel): scene = context.scene cscene = scene.cycles - col = layout.column() - col.prop(cscene, "volume_step_size", text="Step Size") - col.prop(cscene, "volume_max_steps", text="Max Steps") + col = layout.column(align=True) + col.prop(cscene, "volume_step_rate", text="Step Rate Render") + col.prop(cscene, "volume_preview_step_rate", text="Viewport") + + layout.prop(cscene, "volume_max_steps", text="Max Steps") class CYCLES_RENDER_PT_light_paths(CyclesButtonsPanel, Panel): @@ -635,9 +673,6 @@ class CYCLES_RENDER_PT_performance_tiles(CyclesButtonsPanel, Panel): sub = col.column() sub.active = not rd.use_save_buffers - for view_layer in scene.view_layers: - if view_layer.cycles.use_denoising: - sub.active = False sub.prop(cscene, "use_progressive_refine") @@ -705,6 +740,11 @@ class CYCLES_RENDER_PT_performance_viewport(CyclesButtonsPanel, Panel): col.prop(rd, "preview_pixel_size", text="Pixel Size") col.prop(cscene, "preview_start_resolution", text="Start Pixels") + if show_optix_denoising(context): + sub = col.row(align=True) + sub.active = cscene.preview_denoising != 'NONE' + sub.prop(cscene, "preview_denoising_start_sample", text="Denoising Start Sample") + class CYCLES_RENDER_PT_filter(CyclesButtonsPanel, Panel): bl_label = "Filter" @@ -732,6 +772,8 @@ class CYCLES_RENDER_PT_filter(CyclesButtonsPanel, Panel): col.prop(view_layer, "use_solid", text="Surfaces") col = flow.column() col.prop(view_layer, "use_strand", text="Hair") + col = flow.column() + col.prop(view_layer, "use_volumes", text="Volumes") if with_freestyle: col = flow.column() col.prop(view_layer, "use_freestyle", text="Freestyle") @@ -803,6 +845,8 @@ class CYCLES_RENDER_PT_passes_data(CyclesButtonsPanel, Panel): col.prop(cycles_view_layer, "denoising_store_passes", text="Denoising Data") col = flow.column() col.prop(cycles_view_layer, "pass_debug_render_time", text="Render Time") + col = flow.column() + col.prop(cycles_view_layer, "pass_debug_sample_count", text="Sample Count") layout.separator() @@ -848,14 +892,6 @@ class CYCLES_RENDER_PT_passes_light(CyclesButtonsPanel, Panel): split = layout.split(factor=0.35) split.use_property_split = False - split.label(text="Subsurface") - row = split.row(align=True) - row.prop(view_layer, "use_pass_subsurface_direct", text="Direct", toggle=True) - row.prop(view_layer, "use_pass_subsurface_indirect", text="Indirect", toggle=True) - row.prop(view_layer, "use_pass_subsurface_color", text="Color", toggle=True) - - split = layout.split(factor=0.35) - split.use_property_split = False split.label(text="Volume") row = split.row(align=True) row.prop(cycles_view_layer, "use_pass_volume_direct", text="Direct", toggle=True) @@ -981,15 +1017,14 @@ class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel): col = split.column(align=True) - if use_optix(context): - col.prop(cycles_view_layer, "use_optix_denoising", text="OptiX AI Denoising") + if show_optix_denoising(context): + col.prop(cycles_view_layer, "use_optix_denoising") + col.separator(factor=2.0) if cycles_view_layer.use_optix_denoising: col.prop(cycles_view_layer, "denoising_optix_input_passes") return - col.separator(factor=2.0) - col.prop(cycles_view_layer, "denoising_radius", text="Radius") col.prop(cycles_view_layer, "denoising_strength", slider=True, text="Strength") col.prop(cycles_view_layer, "denoising_feature_strength", slider=True, text="Feature Strength") @@ -1036,15 +1071,6 @@ class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel): split = layout.split(factor=0.5) split.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes - col = split.column() - col.alignment = 'RIGHT' - col.label(text="Subsurface") - - row = split.row(align=True) - row.use_property_split = False - row.prop(cycles_view_layer, "denoising_subsurface_direct", text="Direct", toggle=True) - row.prop(cycles_view_layer, "denoising_subsurface_indirect", text="Indirect", toggle=True) - class CYCLES_PT_post_processing(CyclesButtonsPanel, Panel): bl_label = "Post Processing" @@ -1391,8 +1417,6 @@ class CYCLES_LIGHT_PT_light(CyclesButtonsPanel, Panel): light = context.light clamp = light.cycles - layout.use_property_decorate = False - if self.bl_space_type == 'PROPERTIES': layout.row().prop(light, "type", expand=True) layout.use_property_split = True @@ -1674,6 +1698,9 @@ class CYCLES_WORLD_PT_settings_volume(CyclesButtonsPanel, Panel): sub.prop(cworld, "volume_sampling", text="Sampling") col.prop(cworld, "volume_interpolation", text="Interpolation") col.prop(cworld, "homogeneous_volume", text="Homogeneous") + sub = col.column() + sub.active = not cworld.homogeneous_volume + sub.prop(cworld, "volume_step_size") class CYCLES_MATERIAL_PT_preview(CyclesButtonsPanel, Panel): @@ -1805,6 +1832,9 @@ class CYCLES_MATERIAL_PT_settings_volume(CyclesButtonsPanel, Panel): sub.prop(cmat, "volume_sampling", text="Sampling") col.prop(cmat, "volume_interpolation", text="Interpolation") col.prop(cmat, "homogeneous_volume", text="Homogeneous") + sub = col.column() + sub.active = not cmat.homogeneous_volume + sub.prop(cmat, "volume_step_rate") def draw(self, context): self.draw_shared(self, context, context.material) @@ -1852,7 +1882,7 @@ class CYCLES_RENDER_PT_bake_influence(CyclesButtonsPanel, Panel): cscene = scene.cycles rd = scene.render if rd.use_bake_multires == False and cscene.bake_type in { - 'NORMAL', 'COMBINED', 'DIFFUSE', 'GLOSSY', 'TRANSMISSION', 'SUBSURFACE'}: + 'NORMAL', 'COMBINED', 'DIFFUSE', 'GLOSSY', 'TRANSMISSION'}: return True def draw(self, context): @@ -1887,11 +1917,10 @@ class CYCLES_RENDER_PT_bake_influence(CyclesButtonsPanel, Panel): flow.prop(cbk, "use_pass_diffuse") flow.prop(cbk, "use_pass_glossy") flow.prop(cbk, "use_pass_transmission") - flow.prop(cbk, "use_pass_subsurface") flow.prop(cbk, "use_pass_ambient_occlusion") flow.prop(cbk, "use_pass_emit") - elif cscene.bake_type in {'DIFFUSE', 'GLOSSY', 'TRANSMISSION', 'SUBSURFACE'}: + elif cscene.bake_type in {'DIFFUSE', 'GLOSSY', 'TRANSMISSION'}: row = col.row(align=True) row.use_property_split = False row.prop(cbk, "use_pass_direct", toggle=True) @@ -2192,8 +2221,6 @@ def draw_device(self, context): col = layout.column() col.prop(cscene, "feature_set") - scene = context.scene - col = layout.column() col.active = show_device_active(context) col.prop(cscene, "device") @@ -2248,6 +2275,7 @@ classes = ( CYCLES_PT_integrator_presets, CYCLES_RENDER_PT_sampling, CYCLES_RENDER_PT_sampling_sub_samples, + CYCLES_RENDER_PT_sampling_adaptive, CYCLES_RENDER_PT_sampling_advanced, CYCLES_RENDER_PT_light_paths, CYCLES_RENDER_PT_light_paths_max_bounces, diff --git a/intern/cycles/blender/addon/version_update.py b/intern/cycles/blender/addon/version_update.py index 899245db03e..49f23f4ba30 100644 --- a/intern/cycles/blender/addon/version_update.py +++ b/intern/cycles/blender/addon/version_update.py @@ -42,10 +42,7 @@ def custom_bake_remap(scene): 'GLOSSY_COLOR', 'TRANSMISSION_DIRECT', 'TRANSMISSION_INDIRECT', - 'TRANSMISSION_COLOR', - 'SUBSURFACE_DIRECT', - 'SUBSURFACE_INDIRECT', - 'SUBSURFACE_COLOR') + 'TRANSMISSION_COLOR') diffuse_direct_idx = bake_lookup.index('DIFFUSE_DIRECT') diff --git a/intern/cycles/blender/blender_camera.cpp b/intern/cycles/blender/blender_camera.cpp index c84d6e1572b..40a1a2c2edc 100644 --- a/intern/cycles/blender/blender_camera.cpp +++ b/intern/cycles/blender/blender_camera.cpp @@ -725,22 +725,26 @@ static void blender_camera_view_subset(BL::RenderEngine &b_engine, BoundBox2D cam, view; float view_aspect, cam_aspect, sensor_size; - /* get viewport viewplane */ + /* Get viewport viewplane. */ BlenderCamera view_bcam; blender_camera_init(&view_bcam, b_render); blender_camera_from_view(&view_bcam, b_engine, b_scene, b_v3d, b_rv3d, width, height, true); blender_camera_viewplane(&view_bcam, width, height, &view, &view_aspect, &sensor_size); - /* get camera viewplane */ + /* Get camera viewplane. */ BlenderCamera cam_bcam; blender_camera_init(&cam_bcam, b_render); blender_camera_from_object(&cam_bcam, b_engine, b_ob, true); + /* Camera border is affect by aspect, viewport is not. */ + cam_bcam.pixelaspect.x = b_render.pixel_aspect_x(); + cam_bcam.pixelaspect.y = b_render.pixel_aspect_y(); + blender_camera_viewplane( &cam_bcam, cam_bcam.full_width, cam_bcam.full_height, &cam, &cam_aspect, &sensor_size); - /* return */ + /* Return */ *view_box = view * (1.0f / view_aspect); *cam_box = cam * (1.0f / cam_aspect); } @@ -863,7 +867,8 @@ void BlenderSync::sync_view(BL::SpaceView3D &b_v3d, } } -BufferParams BlenderSync::get_buffer_params(BL::RenderSettings &b_render, +BufferParams BlenderSync::get_buffer_params(BL::Scene &b_scene, + BL::RenderSettings &b_render, BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, Camera *cam, @@ -899,7 +904,11 @@ BufferParams BlenderSync::get_buffer_params(BL::RenderSettings &b_render, params.height = height; } - update_viewport_display_passes(b_v3d, params.passes); + PassType display_pass = update_viewport_display_passes(b_v3d, params.passes); + + /* Can only denoise the combined image pass */ + params.denoising_data_pass = display_pass == PASS_COMBINED && + update_viewport_display_denoising(b_v3d, b_scene); return params; } diff --git a/intern/cycles/blender/blender_curves.cpp b/intern/cycles/blender/blender_curves.cpp index 4dba8ffbe0e..0c87808d880 100644 --- a/intern/cycles/blender/blender_curves.cpp +++ b/intern/cycles/blender/blender_curves.cpp @@ -17,6 +17,7 @@ #include "render/attribute.h" #include "render/camera.h" #include "render/curves.h" +#include "render/hair.h" #include "render/mesh.h" #include "render/object.h" #include "render/scene.h" @@ -107,12 +108,12 @@ static void InterpolateKeySegments( } static bool ObtainCacheParticleData( - Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background) + Geometry *geom, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background) { int curvenum = 0; int keyno = 0; - if (!(mesh && b_mesh && b_ob && CData)) + if (!(geom && b_mesh && b_ob && CData)) return false; Transform tfm = get_transform(b_ob->matrix_world()); @@ -128,7 +129,7 @@ static bool ObtainCacheParticleData( if ((b_part.render_type() == BL::ParticleSettings::render_type_PATH) && (b_part.type() == BL::ParticleSettings::type_HAIR)) { - int shader = clamp(b_part.material() - 1, 0, mesh->used_shaders.size() - 1); + int shader = clamp(b_part.material() - 1, 0, geom->used_shaders.size() - 1); int display_step = background ? b_part.render_step() : b_part.display_step(); int totparts = b_psys.particles.length(); int totchild = background ? b_psys.child_particles.length() : @@ -173,19 +174,20 @@ static bool ObtainCacheParticleData( CData->curve_firstkey.push_back_slow(keyno); float curve_length = 0.0f; - float3 pcKey; + float3 prev_co_world = make_float3(0.0f, 0.0f, 0.0f); + float3 prev_co_object = make_float3(0.0f, 0.0f, 0.0f); for (int step_no = 0; step_no < ren_step; step_no++) { - float nco[3]; - b_psys.co_hair(*b_ob, pa_no, step_no, nco); - float3 cKey = make_float3(nco[0], nco[1], nco[2]); - cKey = transform_point(&itfm, cKey); + float3 co_world = prev_co_world; + b_psys.co_hair(*b_ob, pa_no, step_no, &co_world.x); + float3 co_object = transform_point(&itfm, co_world); if (step_no > 0) { - const float step_length = len(cKey - pcKey); + const float step_length = len(co_object - prev_co_object); curve_length += step_length; } - CData->curvekey_co.push_back_slow(cKey); + CData->curvekey_co.push_back_slow(co_object); CData->curvekey_time.push_back_slow(curve_length); - pcKey = cKey; + prev_co_object = co_object; + prev_co_world = co_world; keynum++; } keyno += keynum; @@ -201,14 +203,14 @@ static bool ObtainCacheParticleData( return true; } -static bool ObtainCacheParticleUV(Mesh *mesh, +static bool ObtainCacheParticleUV(Geometry *geom, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background, int uv_num) { - if (!(mesh && b_mesh && b_ob && CData)) + if (!(geom && b_mesh && b_ob && CData)) return false; CData->curve_uv.clear(); @@ -264,14 +266,14 @@ static bool ObtainCacheParticleUV(Mesh *mesh, return true; } -static bool ObtainCacheParticleVcol(Mesh *mesh, +static bool ObtainCacheParticleVcol(Geometry *geom, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background, int vcol_num) { - if (!(mesh && b_mesh && b_ob && CData)) + if (!(geom && b_mesh && b_ob && CData)) return false; CData->curve_vcol.clear(); @@ -593,21 +595,21 @@ static void ExportCurveTriangleGeometry(Mesh *mesh, ParticleCurveData *CData, in /* texture coords still needed */ } -static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData) +static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CData) { int num_keys = 0; int num_curves = 0; - if (mesh->num_curves()) + if (hair->num_curves()) return; Attribute *attr_intercept = NULL; Attribute *attr_random = NULL; - if (mesh->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT)) - attr_intercept = mesh->curve_attributes.add(ATTR_STD_CURVE_INTERCEPT); - if (mesh->need_attribute(scene, ATTR_STD_CURVE_RANDOM)) - attr_random = mesh->curve_attributes.add(ATTR_STD_CURVE_RANDOM); + if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT)) + attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT); + if (hair->need_attribute(scene, ATTR_STD_CURVE_RANDOM)) + attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM); /* compute and reserve size of arrays */ for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) { @@ -620,10 +622,10 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa } if (num_curves > 0) { - VLOG(1) << "Exporting curve segments for mesh " << mesh->name; + VLOG(1) << "Exporting curve segments for mesh " << hair->name; } - mesh->reserve_curves(mesh->num_curves() + num_curves, mesh->curve_keys.size() + num_keys); + hair->reserve_curves(hair->num_curves() + num_curves, hair->curve_keys.size() + num_keys); num_keys = 0; num_curves = 0; @@ -648,7 +650,7 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)) { radius = 0.0f; } - mesh->add_curve_key(ickey_loc, radius); + hair->add_curve_key(ickey_loc, radius); if (attr_intercept) attr_intercept->add(time); @@ -659,16 +661,16 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa attr_random->add(hash_uint2_to_float(num_curves, 0)); } - mesh->add_curve(num_keys, CData->psys_shader[sys]); + hair->add_curve(num_keys, CData->psys_shader[sys]); num_keys += num_curve_keys; num_curves++; } } /* check allocation */ - if ((mesh->curve_keys.size() != num_keys) || (mesh->num_curves() != num_curves)) { + if ((hair->curve_keys.size() != num_keys) || (hair->num_curves() != num_curves)) { VLOG(1) << "Allocation failed, clearing data"; - mesh->clear(); + hair->clear(); } } @@ -712,24 +714,58 @@ static float4 LerpCurveSegmentMotionCV(ParticleCurveData *CData, int sys, int cu return lerp(mP, mP2, remainder); } -static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int motion_step) +static void export_hair_motion_validate_attribute(Hair *hair, + int motion_step, + int num_motion_keys, + bool have_motion) { - VLOG(1) << "Exporting curve motion segments for mesh " << mesh->name << ", motion step " + Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + const int num_keys = hair->curve_keys.size(); + + if (num_motion_keys != num_keys || !have_motion) { + /* No motion or hair "topology" changed, remove attributes again. */ + if (num_motion_keys != num_keys) { + VLOG(1) << "Hair topology changed, removing attribute."; + } + else { + VLOG(1) << "No motion, removing attribute."; + } + hair->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION); + } + else if (motion_step > 0) { + VLOG(1) << "Filling in new motion vertex position for motion_step " << motion_step; + + /* Motion, fill up previous steps that we might have skipped because + * they had no motion, but we need them anyway now. */ + for (int step = 0; step < motion_step; step++) { + float4 *mP = attr_mP->data_float4() + step * num_keys; + + for (int key = 0; key < num_keys; key++) { + mP[key] = float3_to_float4(hair->curve_keys[key]); + mP[key].w = hair->curve_radius[key]; + } + } + } +} + +static void ExportCurveSegmentsMotion(Hair *hair, ParticleCurveData *CData, int motion_step) +{ + VLOG(1) << "Exporting curve motion segments for hair " << hair->name << ", motion step " << motion_step; /* find attribute */ - Attribute *attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); bool new_attribute = false; /* add new attribute if it doesn't exist already */ if (!attr_mP) { VLOG(1) << "Creating new motion vertex position attribute"; - attr_mP = mesh->curve_attributes.add(ATTR_STD_MOTION_VERTEX_POSITION); + attr_mP = hair->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION); new_attribute = true; } /* export motion vectors for curve keys */ - size_t numkeys = mesh->curve_keys.size(); + size_t numkeys = hair->curve_keys.size(); float4 *mP = attr_mP->data_float4() + motion_step * numkeys; bool have_motion = false; int i = 0; @@ -740,24 +776,24 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) { /* Curve lengths may not match! Curves can be clipped. */ - int curve_key_end = (num_curves + 1 < (int)mesh->curve_first_key.size() ? - mesh->curve_first_key[num_curves + 1] : - (int)mesh->curve_keys.size()); - const int num_center_curve_keys = curve_key_end - mesh->curve_first_key[num_curves]; + int curve_key_end = (num_curves + 1 < (int)hair->curve_first_key.size() ? + hair->curve_first_key[num_curves + 1] : + (int)hair->curve_keys.size()); + const int num_center_curve_keys = curve_key_end - hair->curve_first_key[num_curves]; const int is_num_keys_different = CData->curve_keynum[curve] - num_center_curve_keys; if (!is_num_keys_different) { for (int curvekey = CData->curve_firstkey[curve]; curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve]; curvekey++) { - if (i < mesh->curve_keys.size()) { + if (i < hair->curve_keys.size()) { mP[i] = CurveSegmentMotionCV(CData, sys, curve, curvekey); if (!have_motion) { /* unlike mesh coordinates, these tend to be slightly different * between frames due to particle transforms into/out of object * space, so we use an epsilon to detect actual changes */ - float4 curve_key = float3_to_float4(mesh->curve_keys[i]); - curve_key.w = mesh->curve_radius[i]; + float4 curve_key = float3_to_float4(hair->curve_keys[i]); + curve_key.w = hair->curve_radius[i]; if (len_squared(mP[i] - curve_key) > 1e-5f * 1e-5f) have_motion = true; } @@ -781,42 +817,17 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int } } - /* in case of new attribute, we verify if there really was any motion */ + /* In case of new attribute, we verify if there really was any motion. */ if (new_attribute) { - if (i != numkeys || !have_motion) { - /* No motion or hair "topology" changed, remove attributes again. */ - if (i != numkeys) { - VLOG(1) << "Hair topology changed, removing attribute."; - } - else { - VLOG(1) << "No motion, removing attribute."; - } - mesh->curve_attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION); - } - else if (motion_step > 0) { - VLOG(1) << "Filling in new motion vertex position for motion_step " << motion_step; - /* motion, fill up previous steps that we might have skipped because - * they had no motion, but we need them anyway now */ - for (int step = 0; step < motion_step; step++) { - float4 *mP = attr_mP->data_float4() + step * numkeys; - - for (int key = 0; key < numkeys; key++) { - mP[key] = float3_to_float4(mesh->curve_keys[key]); - mP[key].w = mesh->curve_radius[key]; - } - } - } + export_hair_motion_validate_attribute(hair, motion_step, i, have_motion); } } -static void ExportCurveTriangleUV(ParticleCurveData *CData, - int vert_offset, - int resol, - float2 *uvdata) +static void ExportCurveTriangleUV(ParticleCurveData *CData, int resol, float2 *uvdata) { if (uvdata == NULL) return; - int vertexindex = vert_offset; + int vertexindex = 0; for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) { for (int curve = CData->psys_firstcurve[sys]; @@ -844,15 +855,12 @@ static void ExportCurveTriangleUV(ParticleCurveData *CData, } } -static void ExportCurveTriangleVcol(ParticleCurveData *CData, - int vert_offset, - int resol, - uchar4 *cdata) +static void ExportCurveTriangleVcol(ParticleCurveData *CData, int resol, uchar4 *cdata) { if (cdata == NULL) return; - int vertexindex = vert_offset; + int vertexindex = 0; for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) { for (int curve = CData->psys_firstcurve[sys]; @@ -951,7 +959,7 @@ void BlenderSync::sync_curve_settings() if ((b_psys->settings().render_type() == BL::ParticleSettings::render_type_PATH) && (b_psys->settings().type() == BL::ParticleSettings::type_HAIR)) { BL::ID key = BKE_object_is_modified(*b_ob) ? *b_ob : b_ob->data(); - mesh_map.set_recalc(key); + geometry_map.set_recalc(key); object_map.set_recalc(*b_ob); } } @@ -963,42 +971,51 @@ void BlenderSync::sync_curve_settings() curve_system_manager->tag_update(scene); } -void BlenderSync::sync_curves( - Mesh *mesh, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step) +bool BlenderSync::object_has_particle_hair(BL::Object b_ob) { - if (!motion) { - /* Clear stored curve data */ - mesh->curve_keys.clear(); - mesh->curve_radius.clear(); - mesh->curve_first_key.clear(); - mesh->curve_shader.clear(); - mesh->curve_attributes.clear(); + /* Test if the object has a particle modifier with hair. */ + BL::Object::modifiers_iterator b_mod; + for (b_ob.modifiers.begin(b_mod); b_mod != b_ob.modifiers.end(); ++b_mod) { + if ((b_mod->type() == b_mod->type_PARTICLE_SYSTEM) && + (preview ? b_mod->show_viewport() : b_mod->show_render())) { + BL::ParticleSystemModifier psmd((const PointerRNA)b_mod->ptr); + BL::ParticleSystem b_psys((const PointerRNA)psmd.particle_system().ptr); + BL::ParticleSettings b_part((const PointerRNA)b_psys.settings().ptr); + + if ((b_part.render_type() == BL::ParticleSettings::render_type_PATH) && + (b_part.type() == BL::ParticleSettings::type_HAIR)) { + return true; + } + } } - /* obtain general settings */ - const bool use_curves = scene->curve_system_manager->use_curves; + return false; +} - if (!(use_curves && b_ob.mode() != b_ob.mode_PARTICLE_EDIT && b_ob.mode() != b_ob.mode_EDIT)) { - if (!motion) - mesh->compute_bounds(); +/* Old particle hair. */ +void BlenderSync::sync_particle_hair( + Geometry *geom, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step) +{ + Hair *hair = (geom->type == Geometry::HAIR) ? static_cast<Hair *>(geom) : NULL; + Mesh *mesh = (geom->type == Geometry::MESH) ? static_cast<Mesh *>(geom) : NULL; + + /* obtain general settings */ + if (b_ob.mode() == b_ob.mode_PARTICLE_EDIT || b_ob.mode() == b_ob.mode_EDIT) { return; } - const int primitive = scene->curve_system_manager->primitive; const int triangle_method = scene->curve_system_manager->triangle_method; const int resolution = scene->curve_system_manager->resolution; - const size_t vert_num = mesh->verts.size(); - const size_t tri_num = mesh->num_triangles(); int used_res = 1; /* extract particle hair data - should be combined with connecting to mesh later*/ ParticleCurveData CData; - ObtainCacheParticleData(mesh, &b_mesh, &b_ob, &CData, !preview); + ObtainCacheParticleData(geom, &b_mesh, &b_ob, &CData, !preview); /* add hair geometry to mesh */ - if (primitive == CURVE_TRIANGLES) { + if (mesh) { if (triangle_method == CURVE_CAMERA_TRIANGLES) { /* obtain camera parameters */ float3 RotCam; @@ -1022,31 +1039,31 @@ void BlenderSync::sync_curves( } else { if (motion) - ExportCurveSegmentsMotion(mesh, &CData, motion_step); + ExportCurveSegmentsMotion(hair, &CData, motion_step); else - ExportCurveSegments(scene, mesh, &CData); + ExportCurveSegments(scene, hair, &CData); } /* generated coordinates from first key. we should ideally get this from * blender to handle deforming objects */ if (!motion) { - if (mesh->need_attribute(scene, ATTR_STD_GENERATED)) { + if (geom->need_attribute(scene, ATTR_STD_GENERATED)) { float3 loc, size; mesh_texture_space(b_mesh, loc, size); - if (primitive == CURVE_TRIANGLES) { + if (mesh) { Attribute *attr_generated = mesh->attributes.add(ATTR_STD_GENERATED); float3 *generated = attr_generated->data_float3(); - for (size_t i = vert_num; i < mesh->verts.size(); i++) + for (size_t i = 0; i < mesh->verts.size(); i++) generated[i] = mesh->verts[i] * size - loc; } else { - Attribute *attr_generated = mesh->curve_attributes.add(ATTR_STD_GENERATED); + Attribute *attr_generated = hair->attributes.add(ATTR_STD_GENERATED); float3 *generated = attr_generated->data_float3(); - for (size_t i = 0; i < mesh->num_curves(); i++) { - float3 co = mesh->curve_keys[mesh->get_curve(i).first_key]; + for (size_t i = 0; i < hair->num_curves(); i++) { + float3 co = hair->curve_keys[hair->get_curve(i).first_key]; generated[i] = co * size - loc; } } @@ -1059,21 +1076,21 @@ void BlenderSync::sync_curves( int vcol_num = 0; for (b_mesh.vertex_colors.begin(l); l != b_mesh.vertex_colors.end(); ++l, vcol_num++) { - if (!mesh->need_attribute(scene, ustring(l->name().c_str()))) + if (!geom->need_attribute(scene, ustring(l->name().c_str()))) continue; - ObtainCacheParticleVcol(mesh, &b_mesh, &b_ob, &CData, !preview, vcol_num); + ObtainCacheParticleVcol(geom, &b_mesh, &b_ob, &CData, !preview, vcol_num); - if (primitive == CURVE_TRIANGLES) { + if (mesh) { Attribute *attr_vcol = mesh->attributes.add( ustring(l->name().c_str()), TypeDesc::TypeColor, ATTR_ELEMENT_CORNER_BYTE); uchar4 *cdata = attr_vcol->data_uchar4(); - ExportCurveTriangleVcol(&CData, tri_num * 3, used_res, cdata); + ExportCurveTriangleVcol(&CData, used_res, cdata); } else { - Attribute *attr_vcol = mesh->curve_attributes.add( + Attribute *attr_vcol = hair->attributes.add( ustring(l->name().c_str()), TypeDesc::TypeColor, ATTR_ELEMENT_CURVE); float3 *fdata = attr_vcol->data_float3(); @@ -1101,12 +1118,12 @@ void BlenderSync::sync_curves( ustring name = ustring(l->name().c_str()); /* UV map */ - if (mesh->need_attribute(scene, name) || mesh->need_attribute(scene, std)) { + if (geom->need_attribute(scene, name) || geom->need_attribute(scene, std)) { Attribute *attr_uv; - ObtainCacheParticleUV(mesh, &b_mesh, &b_ob, &CData, !preview, uv_num); + ObtainCacheParticleUV(geom, &b_mesh, &b_ob, &CData, !preview, uv_num); - if (primitive == CURVE_TRIANGLES) { + if (mesh) { if (active_render) attr_uv = mesh->attributes.add(std, name); else @@ -1114,13 +1131,13 @@ void BlenderSync::sync_curves( float2 *uv = attr_uv->data_float2(); - ExportCurveTriangleUV(&CData, tri_num * 3, used_res, uv); + ExportCurveTriangleUV(&CData, used_res, uv); } else { if (active_render) - attr_uv = mesh->curve_attributes.add(std, name); + attr_uv = hair->attributes.add(std, name); else - attr_uv = mesh->curve_attributes.add(name, TypeFloat2, ATTR_ELEMENT_CURVE); + attr_uv = hair->attributes.add(name, TypeFloat2, ATTR_ELEMENT_CURVE); float2 *uv = attr_uv->data_float2(); @@ -1135,8 +1152,292 @@ void BlenderSync::sync_curves( } } } +} + +#ifdef WITH_NEW_OBJECT_TYPES +static float4 hair_point_as_float4(BL::HairPoint b_point) +{ + float4 mP = float3_to_float4(get_float3(b_point.co())); + mP.w = b_point.radius(); + return mP; +} + +static float4 interpolate_hair_points(BL::Hair b_hair, + const int first_point_index, + const int num_points, + const float step) +{ + const float curve_t = step * (num_points - 1); + const int point_a = clamp((int)curve_t, 0, num_points - 1); + const int point_b = min(point_a + 1, num_points - 1); + const float t = curve_t - (float)point_a; + return lerp(hair_point_as_float4(b_hair.points[first_point_index + point_a]), + hair_point_as_float4(b_hair.points[first_point_index + point_b]), + t); +} + +static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair) +{ + /* TODO: optimize so we can straight memcpy arrays from Blender? */ + + /* Add requested attributes. */ + Attribute *attr_intercept = NULL; + Attribute *attr_random = NULL; + + if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT)) { + attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT); + } + if (hair->need_attribute(scene, ATTR_STD_CURVE_RANDOM)) { + attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM); + } + + /* Reserve memory. */ + const int num_keys = b_hair.points.length(); + const int num_curves = b_hair.curves.length(); + + if (num_curves > 0) { + VLOG(1) << "Exporting curve segments for hair " << hair->name; + } + + hair->reserve_curves(num_curves, num_keys); + + /* Export curves and points. */ + vector<float> points_length; + + BL::Hair::curves_iterator b_curve_iter; + for (b_hair.curves.begin(b_curve_iter); b_curve_iter != b_hair.curves.end(); ++b_curve_iter) { + BL::HairCurve b_curve = *b_curve_iter; + const int first_point_index = b_curve.first_point_index(); + const int num_points = b_curve.num_points(); + + float3 prev_co = make_float3(0.0f, 0.0f, 0.0f); + float length = 0.0f; + if (attr_intercept) { + points_length.clear(); + points_length.reserve(num_points); + } + + /* Position and radius. */ + for (int i = 0; i < num_points; i++) { + BL::HairPoint b_point = b_hair.points[first_point_index + i]; + + const float3 co = get_float3(b_point.co()); + const float radius = b_point.radius(); + hair->add_curve_key(co, radius); + + if (attr_intercept) { + if (i > 0) { + length += len(co - prev_co); + points_length.push_back(length); + } + prev_co = co; + } + } + + /* Normalized 0..1 attribute along curve. */ + if (attr_intercept) { + for (int i = 0; i < num_points; i++) { + attr_intercept->add((length == 0.0f) ? 0.0f : points_length[i] / length); + } + } + + /* Random number per curve. */ + if (attr_random != NULL) { + attr_random->add(hash_uint2_to_float(b_curve.index(), 0)); + } + + /* Curve. */ + const int shader_index = 0; + hair->add_curve(first_point_index, shader_index); + } +} + +static void export_hair_curves_motion(Hair *hair, BL::Hair b_hair, int motion_step) +{ + VLOG(1) << "Exporting curve motion segments for hair " << hair->name << ", motion step " + << motion_step; + + /* Find or add attribute. */ + Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + bool new_attribute = false; + + if (!attr_mP) { + VLOG(1) << "Creating new motion vertex position attribute"; + attr_mP = hair->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION); + new_attribute = true; + } + + /* Export motion keys. */ + const int num_keys = hair->curve_keys.size(); + float4 *mP = attr_mP->data_float4() + motion_step * num_keys; + bool have_motion = false; + int num_motion_keys = 0; + int curve_index = 0; + + BL::Hair::curves_iterator b_curve_iter; + for (b_hair.curves.begin(b_curve_iter); b_curve_iter != b_hair.curves.end(); ++b_curve_iter) { + BL::HairCurve b_curve = *b_curve_iter; + const int first_point_index = b_curve.first_point_index(); + const int num_points = b_curve.num_points(); + + Hair::Curve curve = hair->get_curve(curve_index); + curve_index++; + + if (num_points == curve.num_keys) { + /* Number of keys matches. */ + for (int i = 0; i < num_points; i++) { + int point_index = first_point_index + i; + + if (point_index < num_keys) { + mP[num_motion_keys] = hair_point_as_float4(b_hair.points[point_index]); + num_motion_keys++; + + if (!have_motion) { + /* TODO: use epsilon for comparison? Was needed for particles due to + * transform, but ideally should not happen anymore. */ + float4 curve_key = float3_to_float4(hair->curve_keys[i]); + curve_key.w = hair->curve_radius[i]; + have_motion = !(mP[i] == curve_key); + } + } + } + } + else { + /* Number of keys has changed. Generate an interpolated version + * to preserve motion blur. */ + const float step_size = curve.num_keys > 1 ? 1.0f / (curve.num_keys - 1) : 0.0f; + for (int i = 0; i < curve.num_keys; i++) { + const float step = i * step_size; + mP[num_motion_keys] = interpolate_hair_points(b_hair, first_point_index, num_points, step); + num_motion_keys++; + } + have_motion = true; + } + } + + /* In case of new attribute, we verify if there really was any motion. */ + if (new_attribute) { + export_hair_motion_validate_attribute(hair, motion_step, num_motion_keys, have_motion); + } +} +#endif /* WITH_NEW_OBJECT_TYPES */ - mesh->compute_bounds(); +/* Hair object. */ +void BlenderSync::sync_hair(Hair *hair, BL::Object &b_ob, bool motion, int motion_step) +{ +#ifdef WITH_NEW_OBJECT_TYPES + /* Convert Blender hair to Cycles curves. */ + BL::Hair b_hair(b_ob.data()); + if (motion) { + export_hair_curves_motion(hair, b_hair, motion_step); + } + else { + export_hair_curves(scene, hair, b_hair); + } +#else + (void)hair; + (void)b_ob; + (void)motion; + (void)motion_step; +#endif /* WITH_NEW_OBJECT_TYPES */ +} + +void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph, + BL::Object b_ob, + Geometry *geom, + const vector<Shader *> &used_shaders) +{ + Hair *hair = (geom->type == Geometry::HAIR) ? static_cast<Hair *>(geom) : NULL; + Mesh *mesh = (geom->type == Geometry::MESH) ? static_cast<Mesh *>(geom) : NULL; + + /* Compares curve_keys rather than strands in order to handle quick hair + * adjustments in dynamic BVH - other methods could probably do this better. */ + array<float3> oldcurve_keys; + array<float> oldcurve_radius; + array<int> oldtriangles; + if (hair) { + oldcurve_keys.steal_data(hair->curve_keys); + oldcurve_radius.steal_data(hair->curve_radius); + } + else { + oldtriangles.steal_data(mesh->triangles); + } + + geom->clear(); + geom->used_shaders = used_shaders; + + if (view_layer.use_hair && scene->curve_system_manager->use_curves) { +#ifdef WITH_NEW_OBJECT_TYPES + if (b_ob.type() == BL::Object::type_HAIR) { + /* Hair object. */ + sync_hair(hair, b_ob, false); + assert(mesh == NULL); + } + else +#endif + { + /* Particle hair. */ + bool need_undeformed = geom->need_attribute(scene, ATTR_STD_GENERATED); + BL::Mesh b_mesh = object_to_mesh( + b_data, b_ob, b_depsgraph, need_undeformed, Mesh::SUBDIVISION_NONE); + + if (b_mesh) { + sync_particle_hair(geom, b_mesh, b_ob, false); + free_object_to_mesh(b_data, b_ob, b_mesh); + } + } + } + + /* tag update */ + const bool rebuild = (hair && ((oldcurve_keys != hair->curve_keys) || + (oldcurve_radius != hair->curve_radius))) || + (mesh && (oldtriangles != mesh->triangles)); + + geom->tag_update(scene, rebuild); +} + +void BlenderSync::sync_hair_motion(BL::Depsgraph b_depsgraph, + BL::Object b_ob, + Geometry *geom, + int motion_step) +{ + Hair *hair = (geom->type == Geometry::HAIR) ? static_cast<Hair *>(geom) : NULL; + Mesh *mesh = (geom->type == Geometry::MESH) ? static_cast<Mesh *>(geom) : NULL; + + /* Skip if nothing exported. */ + if ((hair && hair->num_keys() == 0) || (mesh && mesh->verts.size() == 0)) { + return; + } + + /* Export deformed coordinates. */ + if (ccl::BKE_object_is_deform_modified(b_ob, b_scene, preview)) { +#ifdef WITH_NEW_OBJECT_TYPES + if (b_ob.type() == BL::Object::type_HAIR) { + /* Hair object. */ + sync_hair(hair, b_ob, true, motion_step); + assert(mesh == NULL); + return; + } + else +#endif + { + /* Particle hair. */ + BL::Mesh b_mesh = object_to_mesh(b_data, b_ob, b_depsgraph, false, Mesh::SUBDIVISION_NONE); + if (b_mesh) { + sync_particle_hair(geom, b_mesh, b_ob, true, motion_step); + free_object_to_mesh(b_data, b_ob, b_mesh); + return; + } + } + } + + /* No deformation on this frame, copy coordinates if other frames did have it. */ + if (hair) { + hair->copy_center_to_motion_step(motion_step); + } + else { + mesh->copy_center_to_motion_step(motion_step); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/blender/blender_device.cpp b/intern/cycles/blender/blender_device.cpp index 111fc8d5192..5140f190f36 100644 --- a/intern/cycles/blender/blender_device.cpp +++ b/intern/cycles/blender/blender_device.cpp @@ -17,8 +17,26 @@ #include "blender/blender_device.h" #include "blender/blender_util.h" +#include "util/util_foreach.h" + CCL_NAMESPACE_BEGIN +enum DenoiserType { + DENOISER_NONE = 0, + DENOISER_OPTIX = 1, + + DENOISER_NUM +}; + +enum ComputeDevice { + COMPUTE_DEVICE_CPU = 0, + COMPUTE_DEVICE_CUDA = 1, + COMPUTE_DEVICE_OPENCL = 2, + COMPUTE_DEVICE_OPTIX = 3, + + COMPUTE_DEVICE_NUM +}; + int blender_device_threads(BL::Scene &b_scene) { BL::RenderSettings b_r = b_scene.render(); @@ -40,7 +58,7 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen /* Find network device. */ vector<DeviceInfo> devices = Device::available_devices(DEVICE_MASK_NETWORK); if (!devices.empty()) { - device = devices.front(); + return devices.front(); } } else if (get_enum(cscene, "device") == 1) { @@ -57,14 +75,6 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen } /* Test if we are using GPU devices. */ - enum ComputeDevice { - COMPUTE_DEVICE_CPU = 0, - COMPUTE_DEVICE_CUDA = 1, - COMPUTE_DEVICE_OPENCL = 2, - COMPUTE_DEVICE_OPTIX = 3, - COMPUTE_DEVICE_NUM = 4, - }; - ComputeDevice compute_device = (ComputeDevice)get_enum( cpreferences, "compute_device_type", COMPUTE_DEVICE_NUM, COMPUTE_DEVICE_CPU); @@ -106,6 +116,34 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen } } + /* Ensure there is an OptiX device when using the OptiX denoiser. */ + bool use_optix_denoising = get_enum(cscene, "preview_denoising", DENOISER_NUM, DENOISER_NONE) == + DENOISER_OPTIX && + !background; + BL::Scene::view_layers_iterator b_view_layer; + for (b_scene.view_layers.begin(b_view_layer); b_view_layer != b_scene.view_layers.end(); + ++b_view_layer) { + PointerRNA crl = RNA_pointer_get(&b_view_layer->ptr, "cycles"); + if (get_boolean(crl, "use_optix_denoising")) { + use_optix_denoising = true; + } + } + + if (use_optix_denoising && device.type != DEVICE_OPTIX) { + vector<DeviceInfo> optix_devices = Device::available_devices(DEVICE_MASK_OPTIX); + if (!optix_devices.empty()) { + /* Convert to a special multi device with separate denoising devices. */ + if (device.multi_devices.empty()) { + device.multi_devices.push_back(device); + } + + /* Simply use the first available OptiX device. */ + const DeviceInfo optix_device = optix_devices.front(); + device.id += optix_device.id; /* Uniquely identify this special multi device. */ + device.denoising_devices.push_back(optix_device); + } + } + return device; } diff --git a/intern/cycles/blender/blender_device.h b/intern/cycles/blender/blender_device.h index fd6c045c966..8d2ecac7483 100644 --- a/intern/cycles/blender/blender_device.h +++ b/intern/cycles/blender/blender_device.h @@ -18,9 +18,9 @@ #define __BLENDER_DEVICE_H__ #include "MEM_guardedalloc.h" -#include "RNA_types.h" #include "RNA_access.h" #include "RNA_blender_cpp.h" +#include "RNA_types.h" #include "device/device.h" diff --git a/intern/cycles/blender/blender_geometry.cpp b/intern/cycles/blender/blender_geometry.cpp new file mode 100644 index 00000000000..7ca35cff961 --- /dev/null +++ b/intern/cycles/blender/blender_geometry.cpp @@ -0,0 +1,192 @@ + +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "render/curves.h" +#include "render/hair.h" +#include "render/mesh.h" +#include "render/object.h" + +#include "blender/blender_sync.h" +#include "blender/blender_util.h" + +#include "util/util_foreach.h" + +CCL_NAMESPACE_BEGIN + +Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph, + BL::Object &b_ob, + BL::Object &b_ob_instance, + bool object_updated, + bool use_particle_hair) +{ + /* Test if we can instance or if the object is modified. */ + BL::ID b_ob_data = b_ob.data(); + BL::ID b_key_id = (BKE_object_is_modified(b_ob)) ? b_ob_instance : b_ob_data; + GeometryKey key(b_key_id.ptr.data, use_particle_hair); + BL::Material material_override = view_layer.material_override; + Shader *default_shader = (b_ob.type() == BL::Object::type_VOLUME) ? scene->default_volume : + scene->default_surface; +#ifdef WITH_NEW_OBJECT_TYPES + Geometry::Type geom_type = ((b_ob.type() == BL::Object::type_HAIR || use_particle_hair) && + (scene->curve_system_manager->primitive != CURVE_TRIANGLES)) ? + Geometry::HAIR : + Geometry::MESH; +#else + Geometry::Type geom_type = ((use_particle_hair) && + (scene->curve_system_manager->primitive != CURVE_TRIANGLES)) ? + Geometry::HAIR : + Geometry::MESH; +#endif + + /* Find shader indices. */ + vector<Shader *> used_shaders; + + BL::Object::material_slots_iterator slot; + for (b_ob.material_slots.begin(slot); slot != b_ob.material_slots.end(); ++slot) { + if (material_override) { + find_shader(material_override, used_shaders, default_shader); + } + else { + BL::ID b_material(slot->material()); + find_shader(b_material, used_shaders, default_shader); + } + } + + if (used_shaders.size() == 0) { + if (material_override) + find_shader(material_override, used_shaders, default_shader); + else + used_shaders.push_back(default_shader); + } + + /* Test if we need to sync. */ + Geometry *geom = geometry_map.find(key); + bool sync = true; + if (geom == NULL) { + /* Add new geometry if it did not exist yet. */ + if (geom_type == Geometry::HAIR) { + geom = new Hair(); + } + else { + geom = new Mesh(); + } + geometry_map.add(key, geom); + } + else { + /* Test if we need to update existing geometry. */ + sync = geometry_map.update(geom, b_key_id); + } + + if (!sync) { + /* If transform was applied to geometry, need full update. */ + if (object_updated && geom->transform_applied) { + ; + } + /* Test if shaders changed, these can be object level so geometry + * does not get tagged for recalc. */ + else if (geom->used_shaders != used_shaders) { + ; + } + else { + /* Even if not tagged for recalc, we may need to sync anyway + * because the shader needs different geometry attributes. */ + bool attribute_recalc = false; + + foreach (Shader *shader, geom->used_shaders) { + if (shader->need_update_geometry) { + attribute_recalc = true; + } + } + + if (!attribute_recalc) { + return geom; + } + } + } + + /* Ensure we only sync instanced geometry once. */ + if (geometry_synced.find(geom) != geometry_synced.end()) { + return geom; + } + + progress.set_sync_status("Synchronizing object", b_ob.name()); + + geometry_synced.insert(geom); + + geom->name = ustring(b_ob_data.name().c_str()); + +#ifdef WITH_NEW_OBJECT_TYPES + if (b_ob.type() == BL::Object::type_HAIR || use_particle_hair) { +#else + if (use_particle_hair) { +#endif + sync_hair(b_depsgraph, b_ob, geom, used_shaders); + } + else if (b_ob.type() == BL::Object::type_VOLUME || object_fluid_gas_domain_find(b_ob)) { + Mesh *mesh = static_cast<Mesh *>(geom); + sync_volume(b_ob, mesh, used_shaders); + } + else { + Mesh *mesh = static_cast<Mesh *>(geom); + sync_mesh(b_depsgraph, b_ob, mesh, used_shaders); + } + + return geom; +} + +void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph, + BL::Object &b_ob, + Object *object, + float motion_time, + bool use_particle_hair) +{ + /* Ensure we only sync instanced geometry once. */ + Geometry *geom = object->geometry; + + if (geometry_motion_synced.find(geom) != geometry_motion_synced.end()) + return; + + geometry_motion_synced.insert(geom); + + /* Ensure we only motion sync geometry that also had geometry synced, to avoid + * unnecessary work and to ensure that its attributes were clear. */ + if (geometry_synced.find(geom) == geometry_synced.end()) + return; + + /* Find time matching motion step required by geometry. */ + int motion_step = geom->motion_step(motion_time); + if (motion_step < 0) { + return; + } + +#ifdef WITH_NEW_OBJECT_TYPES + if (b_ob.type() == BL::Object::type_HAIR || use_particle_hair) { +#else + if (use_particle_hair) { +#endif + sync_hair_motion(b_depsgraph, b_ob, geom, motion_step); + } + else if (b_ob.type() == BL::Object::type_VOLUME || object_fluid_gas_domain_find(b_ob)) { + /* No volume motion blur support yet. */ + } + else { + Mesh *mesh = static_cast<Mesh *>(geom); + sync_mesh_motion(b_depsgraph, b_ob, mesh, motion_step); + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/blender/blender_id_map.h b/intern/cycles/blender/blender_id_map.h new file mode 100644 index 00000000000..3bc42e349ae --- /dev/null +++ b/intern/cycles/blender/blender_id_map.h @@ -0,0 +1,299 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __BLENDER_ID_MAP_H__ +#define __BLENDER_ID_MAP_H__ + +#include <string.h> + +#include "util/util_map.h" +#include "util/util_set.h" +#include "util/util_vector.h" + +CCL_NAMESPACE_BEGIN + +/* ID Map + * + * Utility class to map between Blender datablocks and Cycles data structures, + * and keep track of recalc tags from the dependency graph. */ + +template<typename K, typename T> class id_map { + public: + id_map(vector<T *> *scene_data_) + { + scene_data = scene_data_; + } + + T *find(const BL::ID &id) + { + return find(id.ptr.owner_id); + } + + T *find(const K &key) + { + if (b_map.find(key) != b_map.end()) { + T *data = b_map[key]; + return data; + } + + return NULL; + } + + void set_recalc(const BL::ID &id) + { + b_recalc.insert(id.ptr.data); + } + + void set_recalc(void *id_ptr) + { + b_recalc.insert(id_ptr); + } + + bool has_recalc() + { + return !(b_recalc.empty()); + } + + void pre_sync() + { + used_set.clear(); + } + + /* Add new data. */ + void add(const K &key, T *data) + { + assert(find(key) == NULL); + scene_data->push_back(data); + b_map[key] = data; + used(data); + } + + /* Update existing data. */ + bool update(T *data, const BL::ID &id) + { + return update(data, id, id); + } + bool update(T *data, const BL::ID &id, const BL::ID &parent) + { + bool recalc = (b_recalc.find(id.ptr.data) != b_recalc.end()); + if (parent.ptr.data && parent.ptr.data != id.ptr.data) { + recalc = recalc || (b_recalc.find(parent.ptr.data) != b_recalc.end()); + } + used(data); + return recalc; + } + + /* Combined add and update as needed. */ + bool add_or_update(T **r_data, const BL::ID &id) + { + return add_or_update(r_data, id, id, id.ptr.owner_id); + } + bool add_or_update(T **r_data, const BL::ID &id, const K &key) + { + return add_or_update(r_data, id, id, key); + } + bool add_or_update(T **r_data, const BL::ID &id, const BL::ID &parent, const K &key) + { + T *data = find(key); + bool recalc; + + if (!data) { + /* Add data if it didn't exist yet. */ + data = new T(); + add(key, data); + recalc = true; + } + else { + /* check if updated needed. */ + recalc = update(data, id, parent); + } + + *r_data = data; + return recalc; + } + + /* Combined add or update for convenience. */ + + bool is_used(const K &key) + { + T *data = find(key); + return (data) ? used_set.find(data) != used_set.end() : false; + } + + void used(T *data) + { + /* tag data as still in use */ + used_set.insert(data); + } + + void set_default(T *data) + { + b_map[NULL] = data; + } + + bool post_sync(bool do_delete = true) + { + /* remove unused data */ + vector<T *> new_scene_data; + typename vector<T *>::iterator it; + bool deleted = false; + + for (it = scene_data->begin(); it != scene_data->end(); it++) { + T *data = *it; + + if (do_delete && used_set.find(data) == used_set.end()) { + delete data; + deleted = true; + } + else + new_scene_data.push_back(data); + } + + *scene_data = new_scene_data; + + /* update mapping */ + map<K, T *> new_map; + typedef pair<const K, T *> TMapPair; + typename map<K, T *>::iterator jt; + + for (jt = b_map.begin(); jt != b_map.end(); jt++) { + TMapPair &pair = *jt; + + if (used_set.find(pair.second) != used_set.end()) + new_map[pair.first] = pair.second; + } + + used_set.clear(); + b_recalc.clear(); + b_map = new_map; + + return deleted; + } + + const map<K, T *> &key_to_scene_data() + { + return b_map; + } + + protected: + vector<T *> *scene_data; + map<K, T *> b_map; + set<T *> used_set; + set<void *> b_recalc; +}; + +/* Object Key + * + * To uniquely identify instances, we use the parent, object and persistent instance ID. + * We also export separate object for a mesh and its particle hair. */ + +enum { OBJECT_PERSISTENT_ID_SIZE = 16 }; + +struct ObjectKey { + void *parent; + int id[OBJECT_PERSISTENT_ID_SIZE]; + void *ob; + bool use_particle_hair; + + ObjectKey(void *parent_, int id_[OBJECT_PERSISTENT_ID_SIZE], void *ob_, bool use_particle_hair_) + : parent(parent_), ob(ob_), use_particle_hair(use_particle_hair_) + { + if (id_) + memcpy(id, id_, sizeof(id)); + else + memset(id, 0, sizeof(id)); + } + + bool operator<(const ObjectKey &k) const + { + if (ob < k.ob) { + return true; + } + else if (ob == k.ob) { + if (parent < k.parent) { + return true; + } + else if (parent == k.parent) { + if (use_particle_hair < k.use_particle_hair) { + return true; + } + else if (use_particle_hair == k.use_particle_hair) { + return memcmp(id, k.id, sizeof(id)) < 0; + } + } + } + + return false; + } +}; + +/* Geometry Key + * + * We export separate geometry for a mesh and its particle hair, so key needs to + * distinguish between them. */ + +struct GeometryKey { + void *id; + bool use_particle_hair; + + GeometryKey(void *id, bool use_particle_hair) : id(id), use_particle_hair(use_particle_hair) + { + } + + bool operator<(const GeometryKey &k) const + { + if (id < k.id) { + return true; + } + else if (id == k.id) { + if (use_particle_hair < k.use_particle_hair) { + return true; + } + } + + return false; + } +}; + +/* Particle System Key */ + +struct ParticleSystemKey { + void *ob; + int id[OBJECT_PERSISTENT_ID_SIZE]; + + ParticleSystemKey(void *ob_, int id_[OBJECT_PERSISTENT_ID_SIZE]) : ob(ob_) + { + if (id_) + memcpy(id, id_, sizeof(id)); + else + memset(id, 0, sizeof(id)); + } + + bool operator<(const ParticleSystemKey &k) const + { + /* first id is particle index, we don't compare that */ + if (ob < k.ob) + return true; + else if (ob == k.ob) + return memcmp(id + 1, k.id + 1, sizeof(int) * (OBJECT_PERSISTENT_ID_SIZE - 1)) < 0; + + return false; + } +}; + +CCL_NAMESPACE_END + +#endif /* __BLENDER_ID_MAP_H__ */ diff --git a/intern/cycles/blender/blender_image.cpp b/intern/cycles/blender/blender_image.cpp new file mode 100644 index 00000000000..459dc1779fb --- /dev/null +++ b/intern/cycles/blender/blender_image.cpp @@ -0,0 +1,220 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MEM_guardedalloc.h" + +#include "blender/blender_image.h" +#include "blender/blender_session.h" +#include "blender/blender_util.h" + +CCL_NAMESPACE_BEGIN + +/* Packed Images */ + +BlenderImageLoader::BlenderImageLoader(BL::Image b_image, int frame) + : b_image(b_image), frame(frame), free_cache(!b_image.has_data()) +{ +} + +bool BlenderImageLoader::load_metadata(ImageMetaData &metadata) +{ + metadata.width = b_image.size()[0]; + metadata.height = b_image.size()[1]; + metadata.depth = 1; + metadata.channels = b_image.channels(); + + if (b_image.is_float()) { + if (metadata.channels == 1) { + metadata.type = IMAGE_DATA_TYPE_FLOAT; + } + else if (metadata.channels == 4) { + metadata.type = IMAGE_DATA_TYPE_FLOAT4; + } + else { + return false; + } + + /* Float images are already converted on the Blender side, + * no need to do anything in Cycles. */ + metadata.colorspace = u_colorspace_raw; + } + else { + if (metadata.channels == 1) { + metadata.type = IMAGE_DATA_TYPE_BYTE; + } + else if (metadata.channels == 4) { + metadata.type = IMAGE_DATA_TYPE_BYTE4; + } + else { + return false; + } + } + + return true; +} + +bool BlenderImageLoader::load_pixels(const ImageMetaData &metadata, + void *pixels, + const size_t pixels_size, + const bool associate_alpha) +{ + const size_t num_pixels = ((size_t)metadata.width) * metadata.height; + const int channels = metadata.channels; + const int tile = 0; /* TODO(lukas): Support tiles here? */ + + if (b_image.is_float()) { + /* image data */ + float *image_pixels; + image_pixels = image_get_float_pixels_for_frame(b_image, frame, tile); + + if (image_pixels && num_pixels * channels == pixels_size) { + memcpy(pixels, image_pixels, pixels_size * sizeof(float)); + } + else { + if (channels == 1) { + memset(pixels, 0, num_pixels * sizeof(float)); + } + else { + const size_t num_pixels_safe = pixels_size / channels; + float *fp = (float *)pixels; + for (int i = 0; i < num_pixels_safe; i++, fp += channels) { + fp[0] = 1.0f; + fp[1] = 0.0f; + fp[2] = 1.0f; + if (channels == 4) { + fp[3] = 1.0f; + } + } + } + } + + if (image_pixels) { + MEM_freeN(image_pixels); + } + } + else { + unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame, tile); + + if (image_pixels && num_pixels * channels == pixels_size) { + memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char)); + } + else { + if (channels == 1) { + memset(pixels, 0, pixels_size * sizeof(unsigned char)); + } + else { + const size_t num_pixels_safe = pixels_size / channels; + unsigned char *cp = (unsigned char *)pixels; + for (size_t i = 0; i < num_pixels_safe; i++, cp += channels) { + cp[0] = 255; + cp[1] = 0; + cp[2] = 255; + if (channels == 4) { + cp[3] = 255; + } + } + } + } + + if (image_pixels) { + MEM_freeN(image_pixels); + } + + if (associate_alpha) { + /* Premultiply, byte images are always straight for Blender. */ + unsigned char *cp = (unsigned char *)pixels; + for (size_t i = 0; i < num_pixels; i++, cp += channels) { + cp[0] = (cp[0] * cp[3]) >> 8; + cp[1] = (cp[1] * cp[3]) >> 8; + cp[2] = (cp[2] * cp[3]) >> 8; + } + } + } + + /* Free image buffers to save memory during render. */ + if (free_cache) { + b_image.buffers_free(); + } + + return true; +} + +string BlenderImageLoader::name() const +{ + return BL::Image(b_image).name(); +} + +bool BlenderImageLoader::equals(const ImageLoader &other) const +{ + const BlenderImageLoader &other_loader = (const BlenderImageLoader &)other; + return b_image == other_loader.b_image && frame == other_loader.frame; +} + +/* Point Density */ + +BlenderPointDensityLoader::BlenderPointDensityLoader(BL::Depsgraph b_depsgraph, + BL::ShaderNodeTexPointDensity b_node) + : b_depsgraph(b_depsgraph), b_node(b_node) +{ +} + +bool BlenderPointDensityLoader::load_metadata(ImageMetaData &metadata) +{ + metadata.channels = 4; + metadata.width = b_node.resolution(); + metadata.height = metadata.width; + metadata.depth = metadata.width; + metadata.type = IMAGE_DATA_TYPE_FLOAT4; + return true; +} + +bool BlenderPointDensityLoader::load_pixels(const ImageMetaData &, + void *pixels, + const size_t, + const bool) +{ + int length; + b_node.calc_point_density(b_depsgraph, &length, (float **)&pixels); + return true; +} + +void BlenderSession::builtin_images_load() +{ + /* Force builtin images to be loaded along with Blender data sync. This + * is needed because we may be reading from depsgraph evaluated data which + * can be freed by Blender before Cycles reads it. + * + * TODO: the assumption that no further access to builtin image data will + * happen is really weak, and likely to break in the future. We should find + * a better solution to hand over the data directly to the image manager + * instead of through callbacks whose timing is difficult to control. */ + ImageManager *manager = session->scene->image_manager; + Device *device = session->device; + manager->device_load_builtin(device, session->scene, session->progress); +} + +string BlenderPointDensityLoader::name() const +{ + return BL::ShaderNodeTexPointDensity(b_node).name(); +} + +bool BlenderPointDensityLoader::equals(const ImageLoader &other) const +{ + const BlenderPointDensityLoader &other_loader = (const BlenderPointDensityLoader &)other; + return b_node == other_loader.b_node && b_depsgraph == other_loader.b_depsgraph; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/blender/blender_image.h b/intern/cycles/blender/blender_image.h new file mode 100644 index 00000000000..b58a159a6ba --- /dev/null +++ b/intern/cycles/blender/blender_image.h @@ -0,0 +1,61 @@ +/* + * Copyright 2011-2020 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __BLENDER_IMAGE_H__ +#define __BLENDER_IMAGE_H__ + +#include "RNA_blender_cpp.h" + +#include "render/image.h" + +CCL_NAMESPACE_BEGIN + +class BlenderImageLoader : public ImageLoader { + public: + BlenderImageLoader(BL::Image b_image, int frame); + + bool load_metadata(ImageMetaData &metadata) override; + bool load_pixels(const ImageMetaData &metadata, + void *pixels, + const size_t pixels_size, + const bool associate_alpha) override; + string name() const override; + bool equals(const ImageLoader &other) const override; + + BL::Image b_image; + int frame; + bool free_cache; +}; + +class BlenderPointDensityLoader : public ImageLoader { + public: + BlenderPointDensityLoader(BL::Depsgraph depsgraph, BL::ShaderNodeTexPointDensity b_node); + + bool load_metadata(ImageMetaData &metadata) override; + bool load_pixels(const ImageMetaData &metadata, + void *pixels, + const size_t pixels_size, + const bool associate_alpha) override; + string name() const override; + bool equals(const ImageLoader &other) const override; + + BL::Depsgraph b_depsgraph; + BL::ShaderNodeTexPointDensity b_node; +}; + +CCL_NAMESPACE_END + +#endif /* __BLENDER_IMAGE_H__ */ diff --git a/intern/cycles/blender/blender_light.cpp b/intern/cycles/blender/blender_light.cpp new file mode 100644 index 00000000000..6f95821e31e --- /dev/null +++ b/intern/cycles/blender/blender_light.cpp @@ -0,0 +1,212 @@ + + +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "render/light.h" + +#include "blender/blender_sync.h" +#include "blender/blender_util.h" + +#include "util/util_hash.h" + +CCL_NAMESPACE_BEGIN + +void BlenderSync::sync_light(BL::Object &b_parent, + int persistent_id[OBJECT_PERSISTENT_ID_SIZE], + BL::Object &b_ob, + BL::Object &b_ob_instance, + int random_id, + Transform &tfm, + bool *use_portal) +{ + /* test if we need to sync */ + Light *light; + ObjectKey key(b_parent, persistent_id, b_ob_instance, false); + BL::Light b_light(b_ob.data()); + + /* Update if either object or light data changed. */ + if (!light_map.add_or_update(&light, b_ob, b_parent, key)) { + Shader *shader; + if (!shader_map.add_or_update(&shader, b_light)) { + if (light->is_portal) + *use_portal = true; + return; + } + } + + /* type */ + switch (b_light.type()) { + case BL::Light::type_POINT: { + BL::PointLight b_point_light(b_light); + light->size = b_point_light.shadow_soft_size(); + light->type = LIGHT_POINT; + break; + } + case BL::Light::type_SPOT: { + BL::SpotLight b_spot_light(b_light); + light->size = b_spot_light.shadow_soft_size(); + light->type = LIGHT_SPOT; + light->spot_angle = b_spot_light.spot_size(); + light->spot_smooth = b_spot_light.spot_blend(); + break; + } + /* Hemi were removed from 2.8 */ + // case BL::Light::type_HEMI: { + // light->type = LIGHT_DISTANT; + // light->size = 0.0f; + // break; + // } + case BL::Light::type_SUN: { + BL::SunLight b_sun_light(b_light); + light->angle = b_sun_light.angle(); + light->type = LIGHT_DISTANT; + break; + } + case BL::Light::type_AREA: { + BL::AreaLight b_area_light(b_light); + light->size = 1.0f; + light->axisu = transform_get_column(&tfm, 0); + light->axisv = transform_get_column(&tfm, 1); + light->sizeu = b_area_light.size(); + switch (b_area_light.shape()) { + case BL::AreaLight::shape_SQUARE: + light->sizev = light->sizeu; + light->round = false; + break; + case BL::AreaLight::shape_RECTANGLE: + light->sizev = b_area_light.size_y(); + light->round = false; + break; + case BL::AreaLight::shape_DISK: + light->sizev = light->sizeu; + light->round = true; + break; + case BL::AreaLight::shape_ELLIPSE: + light->sizev = b_area_light.size_y(); + light->round = true; + break; + } + light->type = LIGHT_AREA; + break; + } + } + + /* strength */ + light->strength = get_float3(b_light.color()); + light->strength *= BL::PointLight(b_light).energy(); + + /* location and (inverted!) direction */ + light->co = transform_get_column(&tfm, 3); + light->dir = -transform_get_column(&tfm, 2); + light->tfm = tfm; + + /* shader */ + vector<Shader *> used_shaders; + find_shader(b_light, used_shaders, scene->default_light); + light->shader = used_shaders[0]; + + /* shadow */ + PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles"); + PointerRNA clight = RNA_pointer_get(&b_light.ptr, "cycles"); + light->cast_shadow = get_boolean(clight, "cast_shadow"); + light->use_mis = get_boolean(clight, "use_multiple_importance_sampling"); + + int samples = get_int(clight, "samples"); + if (get_boolean(cscene, "use_square_samples")) + light->samples = samples * samples; + else + light->samples = samples; + + light->max_bounces = get_int(clight, "max_bounces"); + + if (b_ob != b_ob_instance) { + light->random_id = random_id; + } + else { + light->random_id = hash_uint2(hash_string(b_ob.name().c_str()), 0); + } + + if (light->type == LIGHT_AREA) + light->is_portal = get_boolean(clight, "is_portal"); + else + light->is_portal = false; + + if (light->is_portal) + *use_portal = true; + + /* visibility */ + uint visibility = object_ray_visibility(b_ob); + light->use_diffuse = (visibility & PATH_RAY_DIFFUSE) != 0; + light->use_glossy = (visibility & PATH_RAY_GLOSSY) != 0; + light->use_transmission = (visibility & PATH_RAY_TRANSMIT) != 0; + light->use_scatter = (visibility & PATH_RAY_VOLUME_SCATTER) != 0; + + /* tag */ + light->tag_update(scene); +} + +void BlenderSync::sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal) +{ + BL::World b_world = b_scene.world(); + + if (b_world) { + PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles"); + PointerRNA cworld = RNA_pointer_get(&b_world.ptr, "cycles"); + + enum SamplingMethod { SAMPLING_NONE = 0, SAMPLING_AUTOMATIC, SAMPLING_MANUAL, SAMPLING_NUM }; + int sampling_method = get_enum(cworld, "sampling_method", SAMPLING_NUM, SAMPLING_AUTOMATIC); + bool sample_as_light = (sampling_method != SAMPLING_NONE); + + if (sample_as_light || use_portal) { + /* test if we need to sync */ + Light *light; + ObjectKey key(b_world, 0, b_world, false); + + if (light_map.add_or_update(&light, b_world, b_world, key) || world_recalc || + b_world.ptr.data != world_map) { + light->type = LIGHT_BACKGROUND; + if (sampling_method == SAMPLING_MANUAL) { + light->map_resolution = get_int(cworld, "sample_map_resolution"); + } + else { + light->map_resolution = 0; + } + light->shader = scene->default_background; + light->use_mis = sample_as_light; + light->max_bounces = get_int(cworld, "max_bounces"); + + /* force enable light again when world is resynced */ + light->is_enabled = true; + + int samples = get_int(cworld, "samples"); + if (get_boolean(cscene, "use_square_samples")) + light->samples = samples * samples; + else + light->samples = samples; + + light->tag_update(scene); + light_map.set_recalc(b_world); + } + } + } + + world_map = b_world.ptr.data; + world_recalc = false; + viewport_parameters = BlenderViewportParameters(b_v3d); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp index b18f9a37948..a6f380a9ae7 100644 --- a/intern/cycles/blender/blender_mesh.cpp +++ b/intern/cycles/blender/blender_mesh.cpp @@ -14,25 +14,25 @@ * limitations under the License. */ +#include "render/camera.h" #include "render/colorspace.h" #include "render/mesh.h" #include "render/object.h" #include "render/scene.h" -#include "render/camera.h" -#include "blender/blender_sync.h" #include "blender/blender_session.h" +#include "blender/blender_sync.h" #include "blender/blender_util.h" #include "subd/subd_patch.h" #include "subd/subd_split.h" #include "util/util_algorithm.h" +#include "util/util_disjoint_set.h" #include "util/util_foreach.h" #include "util/util_hash.h" #include "util/util_logging.h" #include "util/util_math.h" -#include "util/util_disjoint_set.h" #include "mikktspace.h" @@ -278,54 +278,6 @@ static void mikk_compute_tangents( genTangSpaceDefault(&context); } -/* Create Volume Attribute */ - -static void create_mesh_volume_attribute( - BL::Object &b_ob, Mesh *mesh, ImageManager *image_manager, AttributeStandard std, float frame) -{ - BL::FluidDomainSettings b_domain = object_fluid_domain_find(b_ob); - - if (!b_domain) - return; - - mesh->volume_isovalue = b_domain.clipping(); - - Attribute *attr = mesh->attributes.add(std); - VoxelAttribute *volume_data = attr->data_voxel(); - ImageMetaData metadata; - bool animated = false; - - volume_data->manager = image_manager; - volume_data->slot = image_manager->add_image(Attribute::standard_name(std), - b_ob.ptr.data, - animated, - frame, - INTERPOLATION_LINEAR, - EXTENSION_CLIP, - IMAGE_ALPHA_AUTO, - u_colorspace_raw, - metadata); -} - -static void create_mesh_volume_attributes(Scene *scene, BL::Object &b_ob, Mesh *mesh, float frame) -{ - /* for smoke volume rendering */ - if (mesh->need_attribute(scene, ATTR_STD_VOLUME_DENSITY)) - create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_DENSITY, frame); - if (mesh->need_attribute(scene, ATTR_STD_VOLUME_COLOR)) - create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_COLOR, frame); - if (mesh->need_attribute(scene, ATTR_STD_VOLUME_FLAME)) - create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_FLAME, frame); - if (mesh->need_attribute(scene, ATTR_STD_VOLUME_HEAT)) - create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_HEAT, frame); - if (mesh->need_attribute(scene, ATTR_STD_VOLUME_TEMPERATURE)) - create_mesh_volume_attribute( - b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_TEMPERATURE, frame); - if (mesh->need_attribute(scene, ATTR_STD_VOLUME_VELOCITY)) - create_mesh_volume_attribute( - b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_VELOCITY, frame); -} - /* Create vertex color attributes. */ static void attr_create_vertex_color(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, bool subdivision) { @@ -333,14 +285,27 @@ static void attr_create_vertex_color(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, BL::Mesh::vertex_colors_iterator l; for (b_mesh.vertex_colors.begin(l); l != b_mesh.vertex_colors.end(); ++l) { - if (!mesh->need_attribute(scene, ustring(l->name().c_str()))) + const bool active_render = l->active_render(); + AttributeStandard vcol_std = (active_render) ? ATTR_STD_VERTEX_COLOR : ATTR_STD_NONE; + ustring vcol_name = ustring(l->name().c_str()); + + const bool need_vcol = mesh->need_attribute(scene, vcol_name) || + mesh->need_attribute(scene, vcol_std); + + if (!need_vcol) { continue; + } - Attribute *attr = mesh->subd_attributes.add( - ustring(l->name().c_str()), TypeRGBA, ATTR_ELEMENT_CORNER_BYTE); + Attribute *vcol_attr = NULL; + if (active_render) { + vcol_attr = mesh->subd_attributes.add(vcol_std, vcol_name); + } + else { + vcol_attr = mesh->subd_attributes.add(vcol_name, TypeRGBA, ATTR_ELEMENT_CORNER_BYTE); + } BL::Mesh::polygons_iterator p; - uchar4 *cdata = attr->data_uchar4(); + uchar4 *cdata = vcol_attr->data_uchar4(); for (b_mesh.polygons.begin(p); p != b_mesh.polygons.end(); ++p) { int n = p->loop_total(); @@ -355,14 +320,27 @@ static void attr_create_vertex_color(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, else { BL::Mesh::vertex_colors_iterator l; for (b_mesh.vertex_colors.begin(l); l != b_mesh.vertex_colors.end(); ++l) { - if (!mesh->need_attribute(scene, ustring(l->name().c_str()))) + const bool active_render = l->active_render(); + AttributeStandard vcol_std = (active_render) ? ATTR_STD_VERTEX_COLOR : ATTR_STD_NONE; + ustring vcol_name = ustring(l->name().c_str()); + + const bool need_vcol = mesh->need_attribute(scene, vcol_name) || + mesh->need_attribute(scene, vcol_std); + + if (!need_vcol) { continue; + } - Attribute *attr = mesh->attributes.add( - ustring(l->name().c_str()), TypeRGBA, ATTR_ELEMENT_CORNER_BYTE); + Attribute *vcol_attr = NULL; + if (active_render) { + vcol_attr = mesh->attributes.add(vcol_std, vcol_name); + } + else { + vcol_attr = mesh->attributes.add(vcol_name, TypeRGBA, ATTR_ELEMENT_CORNER_BYTE); + } BL::Mesh::loop_triangles_iterator t; - uchar4 *cdata = attr->data_uchar4(); + uchar4 *cdata = vcol_attr->data_uchar4(); for (b_mesh.loop_triangles.begin(t); t != b_mesh.loop_triangles.end(); ++t) { int3 li = get_int3(t->loops()); @@ -859,9 +837,9 @@ static void create_mesh(Scene *scene, attr_create_uv_map(scene, mesh, b_mesh); } - /* for volume objects, create a matrix to transform from object space to + /* For volume objects, create a matrix to transform from object space to * mesh texture space. this does not work with deformations but that can - * probably only be done well with a volume grid mapping of coordinates */ + * probably only be done well with a volume grid mapping of coordinates. */ if (mesh->need_attribute(scene, ATTR_STD_GENERATED_TRANSFORM)) { Attribute *attr = mesh->attributes.add(ATTR_STD_GENERATED_TRANSFORM); Transform *tfm = attr->data_transform(); @@ -930,7 +908,7 @@ static void sync_mesh_fluid_motion(BL::Object &b_ob, Scene *scene, Mesh *mesh) if (scene->need_motion() == Scene::MOTION_NONE) return; - BL::FluidDomainSettings b_fluid_domain = object_fluid_domain_find(b_ob); + BL::FluidDomainSettings b_fluid_domain = object_fluid_liquid_domain_find(b_ob); if (!b_fluid_domain) return; @@ -963,82 +941,11 @@ static void sync_mesh_fluid_motion(BL::Object &b_ob, Scene *scene, Mesh *mesh) } } -Mesh *BlenderSync::sync_mesh(BL::Depsgraph &b_depsgraph, - BL::Object &b_ob, - BL::Object &b_ob_instance, - bool object_updated, - bool show_self, - bool show_particles) +void BlenderSync::sync_mesh(BL::Depsgraph b_depsgraph, + BL::Object b_ob, + Mesh *mesh, + const vector<Shader *> &used_shaders) { - /* test if we can instance or if the object is modified */ - BL::ID b_ob_data = b_ob.data(); - BL::ID key = (BKE_object_is_modified(b_ob)) ? b_ob_instance : b_ob_data; - BL::Material material_override = view_layer.material_override; - - /* find shader indices */ - vector<Shader *> used_shaders; - - BL::Object::material_slots_iterator slot; - for (b_ob.material_slots.begin(slot); slot != b_ob.material_slots.end(); ++slot) { - if (material_override) { - find_shader(material_override, used_shaders, scene->default_surface); - } - else { - BL::ID b_material(slot->material()); - find_shader(b_material, used_shaders, scene->default_surface); - } - } - - if (used_shaders.size() == 0) { - if (material_override) - find_shader(material_override, used_shaders, scene->default_surface); - else - used_shaders.push_back(scene->default_surface); - } - - /* test if we need to sync */ - int requested_geometry_flags = Mesh::GEOMETRY_NONE; - if (view_layer.use_surfaces) { - requested_geometry_flags |= Mesh::GEOMETRY_TRIANGLES; - } - if (view_layer.use_hair) { - requested_geometry_flags |= Mesh::GEOMETRY_CURVES; - } - Mesh *mesh; - - if (!mesh_map.sync(&mesh, key)) { - /* if transform was applied to mesh, need full update */ - if (object_updated && mesh->transform_applied) - ; - /* test if shaders changed, these can be object level so mesh - * does not get tagged for recalc */ - else if (mesh->used_shaders != used_shaders) - ; - else if (requested_geometry_flags != mesh->geometry_flags) - ; - else { - /* even if not tagged for recalc, we may need to sync anyway - * because the shader needs different mesh attributes */ - bool attribute_recalc = false; - - foreach (Shader *shader, mesh->used_shaders) - if (shader->need_update_mesh) - attribute_recalc = true; - - if (!attribute_recalc) - return mesh; - } - } - - /* ensure we only sync instanced meshes once */ - if (mesh_synced.find(mesh) != mesh_synced.end()) - return mesh; - - progress.set_sync_status("Synchronizing object", b_ob.name()); - - mesh_synced.insert(mesh); - - /* create derived mesh */ array<int> oldtriangles; array<Mesh::SubdFace> oldsubd_faces; array<int> oldsubd_face_corners; @@ -1046,150 +953,73 @@ Mesh *BlenderSync::sync_mesh(BL::Depsgraph &b_depsgraph, oldsubd_faces.steal_data(mesh->subd_faces); oldsubd_face_corners.steal_data(mesh->subd_face_corners); - /* compares curve_keys rather than strands in order to handle quick hair - * adjustments in dynamic BVH - other methods could probably do this better*/ - array<float3> oldcurve_keys; - array<float> oldcurve_radius; - oldcurve_keys.steal_data(mesh->curve_keys); - oldcurve_radius.steal_data(mesh->curve_radius); - - /* ensure bvh rebuild (instead of refit) if has_voxel_attributes() changed */ - bool oldhas_voxel_attributes = mesh->has_voxel_attributes(); - mesh->clear(); mesh->used_shaders = used_shaders; - mesh->name = ustring(b_ob_data.name().c_str()); - if (requested_geometry_flags != Mesh::GEOMETRY_NONE) { + mesh->subdivision_type = Mesh::SUBDIVISION_NONE; + + if (view_layer.use_surfaces) { /* Adaptive subdivision setup. Not for baking since that requires * exact mapping to the Blender mesh. */ - if (scene->bake_manager->get_baking()) { - mesh->subdivision_type = Mesh::SUBDIVISION_NONE; - } - else { + if (!scene->bake_manager->get_baking()) { mesh->subdivision_type = object_subdivision_type(b_ob, preview, experimental); } /* For some reason, meshes do not need this... */ bool need_undeformed = mesh->need_attribute(scene, ATTR_STD_GENERATED); - BL::Mesh b_mesh = object_to_mesh( b_data, b_ob, b_depsgraph, need_undeformed, mesh->subdivision_type); if (b_mesh) { /* Sync mesh itself. */ - if (view_layer.use_surfaces && show_self) { - if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) - create_subd_mesh(scene, mesh, b_ob, b_mesh, used_shaders, dicing_rate, max_subdivisions); - else - create_mesh(scene, mesh, b_mesh, used_shaders, false); - - create_mesh_volume_attributes(scene, b_ob, mesh, b_scene.frame_current()); - } - - /* Sync hair curves. */ - if (view_layer.use_hair && show_particles && - mesh->subdivision_type == Mesh::SUBDIVISION_NONE) { - sync_curves(mesh, b_mesh, b_ob, false); - } + if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) + create_subd_mesh( + scene, mesh, b_ob, b_mesh, mesh->used_shaders, dicing_rate, max_subdivisions); + else + create_mesh(scene, mesh, b_mesh, mesh->used_shaders, false); free_object_to_mesh(b_data, b_ob, b_mesh); } } - mesh->geometry_flags = requested_geometry_flags; /* mesh fluid motion mantaflow */ sync_mesh_fluid_motion(b_ob, scene, mesh); /* tag update */ bool rebuild = (oldtriangles != mesh->triangles) || (oldsubd_faces != mesh->subd_faces) || - (oldsubd_face_corners != mesh->subd_face_corners) || - (oldcurve_keys != mesh->curve_keys) || (oldcurve_radius != mesh->curve_radius) || - (oldhas_voxel_attributes != mesh->has_voxel_attributes()); + (oldsubd_face_corners != mesh->subd_face_corners); mesh->tag_update(scene, rebuild); - - return mesh; } -void BlenderSync::sync_mesh_motion(BL::Depsgraph &b_depsgraph, - BL::Object &b_ob, - Object *object, - float motion_time) +void BlenderSync::sync_mesh_motion(BL::Depsgraph b_depsgraph, + BL::Object b_ob, + Mesh *mesh, + int motion_step) { - /* ensure we only sync instanced meshes once */ - Mesh *mesh = object->mesh; - - if (mesh_motion_synced.find(mesh) != mesh_motion_synced.end()) - return; - - mesh_motion_synced.insert(mesh); - - /* ensure we only motion sync meshes that also had mesh synced, to avoid - * unnecessary work and to ensure that its attributes were clear */ - if (mesh_synced.find(mesh) == mesh_synced.end()) - return; - - /* Find time matching motion step required by mesh. */ - int motion_step = mesh->motion_step(motion_time); - if (motion_step < 0) { + /* Fluid motion blur already exported. */ + BL::FluidDomainSettings b_fluid_domain = object_fluid_liquid_domain_find(b_ob); + if (b_fluid_domain) { return; } - /* skip empty meshes */ - const size_t numverts = mesh->verts.size(); - const size_t numkeys = mesh->curve_keys.size(); - - if (!numverts && !numkeys) + /* Skip if no vertices were exported. */ + size_t numverts = mesh->verts.size(); + if (numverts == 0) { return; + } - /* skip objects without deforming modifiers. this is not totally reliable, - * would need a more extensive check to see which objects are animated */ + /* Skip objects without deforming modifiers. this is not totally reliable, + * would need a more extensive check to see which objects are animated. */ BL::Mesh b_mesh(PointerRNA_NULL); - - /* manta motion is exported immediate with mesh, skip here */ - BL::FluidDomainSettings b_fluid_domain = object_fluid_domain_find(b_ob); - if (b_fluid_domain) - return; - if (ccl::BKE_object_is_deform_modified(b_ob, b_scene, preview)) { /* get derived mesh */ b_mesh = object_to_mesh(b_data, b_ob, b_depsgraph, false, Mesh::SUBDIVISION_NONE); } - if (!b_mesh) { - /* if we have no motion blur on this frame, but on other frames, copy */ - if (numverts) { - /* triangles */ - Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); - - if (attr_mP) { - Attribute *attr_mN = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL); - Attribute *attr_N = mesh->attributes.find(ATTR_STD_VERTEX_NORMAL); - float3 *P = &mesh->verts[0]; - float3 *N = (attr_N) ? attr_N->data_float3() : NULL; - - memcpy(attr_mP->data_float3() + motion_step * numverts, P, sizeof(float3) * numverts); - if (attr_mN) - memcpy(attr_mN->data_float3() + motion_step * numverts, N, sizeof(float3) * numverts); - } - } - - if (numkeys) { - /* curves */ - Attribute *attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); - - if (attr_mP) { - float3 *keys = &mesh->curve_keys[0]; - memcpy(attr_mP->data_float3() + motion_step * numkeys, keys, sizeof(float3) * numkeys); - } - } - - return; - } - /* TODO(sergey): Perform preliminary check for number of vertices. */ - if (numverts) { + if (b_mesh) { + /* Export deformed coordinates. */ /* Find attributes. */ Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); Attribute *attr_mN = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL); @@ -1254,14 +1084,13 @@ void BlenderSync::sync_mesh_motion(BL::Depsgraph &b_depsgraph, } } } - } - /* hair motion */ - if (numkeys) - sync_curves(mesh, b_mesh, b_ob, true, motion_step); + free_object_to_mesh(b_data, b_ob, b_mesh); + return; + } - /* free derived mesh */ - free_object_to_mesh(b_data, b_ob, b_mesh); + /* No deformation on this frame, copy coordinates if other frames did have it. */ + mesh->copy_center_to_motion_step(motion_step); } CCL_NAMESPACE_END diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp index 6981412bb88..4b29c28913b 100644 --- a/intern/cycles/blender/blender_object.cpp +++ b/intern/cycles/blender/blender_object.cpp @@ -15,14 +15,14 @@ */ #include "render/camera.h" -#include "render/integrator.h" #include "render/graph.h" +#include "render/integrator.h" #include "render/light.h" #include "render/mesh.h" -#include "render/object.h" -#include "render/scene.h" #include "render/nodes.h" +#include "render/object.h" #include "render/particles.h" +#include "render/scene.h" #include "render/shader.h" #include "blender/blender_object_cull.h" @@ -67,10 +67,20 @@ bool BlenderSync::object_is_mesh(BL::Object &b_ob) return false; } - if (b_ob.type() == BL::Object::type_CURVE) { + BL::Object::type_enum type = b_ob.type(); + +#ifdef WITH_NEW_OBJECT_TYPES + if (type == BL::Object::type_VOLUME || type == BL::Object::type_HAIR) { +#else + if (type == BL::Object::type_VOLUME) { +#endif + /* Will be exported attached to mesh. */ + return true; + } + else if (type == BL::Object::type_CURVE) { /* Skip exporting curves without faces, overhead can be * significant if there are many for path animation. */ - BL::Curve b_curve(b_ob.data()); + BL::Curve b_curve(b_ob_data); return (b_curve.bevel_object() || b_curve.extrude() != 0.0f || b_curve.bevel_depth() != 0.0f || b_curve.dimensions() == BL::Curve::dimensions_2D || b_ob.modifiers.length()); @@ -88,215 +98,13 @@ bool BlenderSync::object_is_light(BL::Object &b_ob) return (b_ob_data && b_ob_data.is_a(&RNA_Light)); } -static uint object_ray_visibility(BL::Object &b_ob) -{ - PointerRNA cvisibility = RNA_pointer_get(&b_ob.ptr, "cycles_visibility"); - uint flag = 0; - - flag |= get_boolean(cvisibility, "camera") ? PATH_RAY_CAMERA : 0; - flag |= get_boolean(cvisibility, "diffuse") ? PATH_RAY_DIFFUSE : 0; - flag |= get_boolean(cvisibility, "glossy") ? PATH_RAY_GLOSSY : 0; - flag |= get_boolean(cvisibility, "transmission") ? PATH_RAY_TRANSMIT : 0; - flag |= get_boolean(cvisibility, "shadow") ? PATH_RAY_SHADOW : 0; - flag |= get_boolean(cvisibility, "scatter") ? PATH_RAY_VOLUME_SCATTER : 0; - - return flag; -} - -/* Light */ - -void BlenderSync::sync_light(BL::Object &b_parent, - int persistent_id[OBJECT_PERSISTENT_ID_SIZE], - BL::Object &b_ob, - BL::Object &b_ob_instance, - int random_id, - Transform &tfm, - bool *use_portal) -{ - /* test if we need to sync */ - Light *light; - ObjectKey key(b_parent, persistent_id, b_ob_instance); - BL::Light b_light(b_ob.data()); - - /* Update if either object or light data changed. */ - if (!light_map.sync(&light, b_ob, b_parent, key)) { - Shader *shader; - if (!shader_map.sync(&shader, b_light)) { - if (light->is_portal) - *use_portal = true; - return; - } - } - - /* type */ - switch (b_light.type()) { - case BL::Light::type_POINT: { - BL::PointLight b_point_light(b_light); - light->size = b_point_light.shadow_soft_size(); - light->type = LIGHT_POINT; - break; - } - case BL::Light::type_SPOT: { - BL::SpotLight b_spot_light(b_light); - light->size = b_spot_light.shadow_soft_size(); - light->type = LIGHT_SPOT; - light->spot_angle = b_spot_light.spot_size(); - light->spot_smooth = b_spot_light.spot_blend(); - break; - } - /* Hemi were removed from 2.8 */ - // case BL::Light::type_HEMI: { - // light->type = LIGHT_DISTANT; - // light->size = 0.0f; - // break; - // } - case BL::Light::type_SUN: { - BL::SunLight b_sun_light(b_light); - light->angle = b_sun_light.angle(); - light->type = LIGHT_DISTANT; - break; - } - case BL::Light::type_AREA: { - BL::AreaLight b_area_light(b_light); - light->size = 1.0f; - light->axisu = transform_get_column(&tfm, 0); - light->axisv = transform_get_column(&tfm, 1); - light->sizeu = b_area_light.size(); - switch (b_area_light.shape()) { - case BL::AreaLight::shape_SQUARE: - light->sizev = light->sizeu; - light->round = false; - break; - case BL::AreaLight::shape_RECTANGLE: - light->sizev = b_area_light.size_y(); - light->round = false; - break; - case BL::AreaLight::shape_DISK: - light->sizev = light->sizeu; - light->round = true; - break; - case BL::AreaLight::shape_ELLIPSE: - light->sizev = b_area_light.size_y(); - light->round = true; - break; - } - light->type = LIGHT_AREA; - break; - } - } - - /* strength */ - light->strength = get_float3(b_light.color()); - light->strength *= BL::PointLight(b_light).energy(); - - /* location and (inverted!) direction */ - light->co = transform_get_column(&tfm, 3); - light->dir = -transform_get_column(&tfm, 2); - light->tfm = tfm; - - /* shader */ - vector<Shader *> used_shaders; - find_shader(b_light, used_shaders, scene->default_light); - light->shader = used_shaders[0]; - - /* shadow */ - PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles"); - PointerRNA clight = RNA_pointer_get(&b_light.ptr, "cycles"); - light->cast_shadow = get_boolean(clight, "cast_shadow"); - light->use_mis = get_boolean(clight, "use_multiple_importance_sampling"); - - int samples = get_int(clight, "samples"); - if (get_boolean(cscene, "use_square_samples")) - light->samples = samples * samples; - else - light->samples = samples; - - light->max_bounces = get_int(clight, "max_bounces"); - - if (b_ob != b_ob_instance) { - light->random_id = random_id; - } - else { - light->random_id = hash_uint2(hash_string(b_ob.name().c_str()), 0); - } - - if (light->type == LIGHT_AREA) - light->is_portal = get_boolean(clight, "is_portal"); - else - light->is_portal = false; - - if (light->is_portal) - *use_portal = true; - - /* visibility */ - uint visibility = object_ray_visibility(b_ob); - light->use_diffuse = (visibility & PATH_RAY_DIFFUSE) != 0; - light->use_glossy = (visibility & PATH_RAY_GLOSSY) != 0; - light->use_transmission = (visibility & PATH_RAY_TRANSMIT) != 0; - light->use_scatter = (visibility & PATH_RAY_VOLUME_SCATTER) != 0; - - /* tag */ - light->tag_update(scene); -} - -void BlenderSync::sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal) -{ - BL::World b_world = b_scene.world(); - - if (b_world) { - PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles"); - PointerRNA cworld = RNA_pointer_get(&b_world.ptr, "cycles"); - - enum SamplingMethod { SAMPLING_NONE = 0, SAMPLING_AUTOMATIC, SAMPLING_MANUAL, SAMPLING_NUM }; - int sampling_method = get_enum(cworld, "sampling_method", SAMPLING_NUM, SAMPLING_AUTOMATIC); - bool sample_as_light = (sampling_method != SAMPLING_NONE); - - if (sample_as_light || use_portal) { - /* test if we need to sync */ - Light *light; - ObjectKey key(b_world, 0, b_world); - - if (light_map.sync(&light, b_world, b_world, key) || world_recalc || - b_world.ptr.data != world_map) { - light->type = LIGHT_BACKGROUND; - if (sampling_method == SAMPLING_MANUAL) { - light->map_resolution = get_int(cworld, "sample_map_resolution"); - } - else { - light->map_resolution = 0; - } - light->shader = scene->default_background; - light->use_mis = sample_as_light; - light->max_bounces = get_int(cworld, "max_bounces"); - - /* force enable light again when world is resynced */ - light->is_enabled = true; - - int samples = get_int(cworld, "samples"); - if (get_boolean(cscene, "use_square_samples")) - light->samples = samples * samples; - else - light->samples = samples; - - light->tag_update(scene); - light_map.set_recalc(b_world); - } - } - } - - world_map = b_world.ptr.data; - world_recalc = false; - viewport_parameters = BlenderViewportParameters(b_v3d); -} - /* Object */ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph, BL::ViewLayer &b_view_layer, BL::DepsgraphObjectInstance &b_instance, float motion_time, - bool show_self, - bool show_particles, + bool use_particle_hair, bool show_lights, BlenderObjectCulling &culling, bool *use_portal) @@ -378,7 +186,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph, } /* key to lookup object */ - ObjectKey key(b_parent, persistent_id, b_ob_instance); + ObjectKey key(b_parent, persistent_id, b_ob_instance, use_particle_hair); Object *object; /* motion vector case */ @@ -393,8 +201,8 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph, } /* mesh deformation */ - if (object->mesh) - sync_mesh_motion(b_depsgraph, b_ob, object, motion_time); + if (object->geometry) + sync_geometry_motion(b_depsgraph, b_ob, object, motion_time, use_particle_hair); } return object; @@ -403,12 +211,12 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph, /* test if we need to sync */ bool object_updated = false; - if (object_map.sync(&object, b_ob, b_parent, key)) + if (object_map.add_or_update(&object, b_ob, b_parent, key)) object_updated = true; /* mesh sync */ - object->mesh = sync_mesh( - b_depsgraph, b_ob, b_ob_instance, object_updated, show_self, show_particles); + object->geometry = sync_geometry( + b_depsgraph, b_ob, b_ob_instance, object_updated, use_particle_hair); /* special case not tracked by object update flags */ @@ -450,7 +258,8 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph, /* object sync * transform comparison should not be needed, but duplis don't work perfect * in the depsgraph and may not signal changes, so this is a workaround */ - if (object_updated || (object->mesh && object->mesh->need_update) || tfm != object->tfm) { + if (object_updated || (object->geometry && object->geometry->need_update) || + tfm != object->tfm) { object->name = b_ob.name().c_str(); object->pass_id = b_ob.pass_index(); object->color = get_float3(b_ob.color()); @@ -459,23 +268,23 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph, /* motion blur */ Scene::MotionType need_motion = scene->need_motion(); - if (need_motion != Scene::MOTION_NONE && object->mesh) { - Mesh *mesh = object->mesh; - mesh->use_motion_blur = false; - mesh->motion_steps = 0; + if (need_motion != Scene::MOTION_NONE && object->geometry) { + Geometry *geom = object->geometry; + geom->use_motion_blur = false; + geom->motion_steps = 0; uint motion_steps; if (need_motion == Scene::MOTION_BLUR) { - motion_steps = object_motion_steps(b_parent, b_ob); - mesh->motion_steps = motion_steps; + motion_steps = object_motion_steps(b_parent, b_ob, Object::MAX_MOTION_STEPS); + geom->motion_steps = motion_steps; if (motion_steps && object_use_deform_motion(b_parent, b_ob)) { - mesh->use_motion_blur = true; + geom->use_motion_blur = true; } } else { motion_steps = 3; - mesh->motion_steps = motion_steps; + geom->motion_steps = motion_steps; } object->motion.clear(); @@ -526,13 +335,13 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph, if (!motion) { /* prepare for sync */ light_map.pre_sync(); - mesh_map.pre_sync(); + geometry_map.pre_sync(); object_map.pre_sync(); particle_system_map.pre_sync(); motion_times.clear(); } else { - mesh_motion_synced.clear(); + geometry_motion_synced.clear(); } /* initialize culling */ @@ -552,22 +361,34 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph, BL::DepsgraphObjectInstance b_instance = *b_instance_iter; BL::Object b_ob = b_instance.object(); - /* load per-object culling data */ + /* Viewport visibility. */ + const bool show_in_viewport = !b_v3d || b_ob.visible_in_viewport_get(b_v3d); + if (show_in_viewport == false) { + continue; + } + + /* Load per-object culling data. */ culling.init_object(scene, b_ob); - /* test if object needs to be hidden */ - const bool show_self = b_instance.show_self(); - const bool show_particles = b_instance.show_particles(); - const bool show_in_viewport = !b_v3d || b_ob.visible_in_viewport_get(b_v3d); + /* Object itself. */ + if (b_instance.show_self()) { + sync_object(b_depsgraph, + b_view_layer, + b_instance, + motion_time, + false, + show_lights, + culling, + &use_portal); + } - if (show_in_viewport && (show_self || show_particles)) { - /* object itself */ + /* Particle hair as separate object. */ + if (b_instance.show_particles() && object_has_particle_hair(b_ob)) { sync_object(b_depsgraph, b_view_layer, b_instance, motion_time, - show_self, - show_particles, + true, show_lights, culling, &use_portal); @@ -584,8 +405,8 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph, /* handle removed data and modified pointers */ if (light_map.post_sync()) scene->light_manager->tag_update(scene); - if (mesh_map.post_sync()) - scene->mesh_manager->tag_update(scene); + if (geometry_map.post_sync()) + scene->geometry_manager->tag_update(scene); if (object_map.post_sync()) scene->object_manager->tag_update(scene); if (particle_system_map.post_sync()) @@ -593,7 +414,7 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph, } if (motion) - mesh_motion_synced.clear(); + geometry_motion_synced.clear(); } void BlenderSync::sync_motion(BL::RenderSettings &b_render, diff --git a/intern/cycles/blender/blender_object_cull.cpp b/intern/cycles/blender/blender_object_cull.cpp index 74f8fb1dc53..bebecb364eb 100644 --- a/intern/cycles/blender/blender_object_cull.cpp +++ b/intern/cycles/blender/blender_object_cull.cpp @@ -19,6 +19,7 @@ #include "render/camera.h" #include "blender/blender_object_cull.h" +#include "blender/blender_util.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/blender/blender_particles.cpp b/intern/cycles/blender/blender_particles.cpp index d74f132ed60..e5eab1ae62b 100644 --- a/intern/cycles/blender/blender_particles.cpp +++ b/intern/cycles/blender/blender_particles.cpp @@ -39,7 +39,7 @@ bool BlenderSync::sync_dupli_particle(BL::Object &b_ob, object->hide_on_missing_motion = true; /* test if we need particle data */ - if (!object->mesh->need_attribute(scene, ATTR_STD_PARTICLE)) + if (!object->geometry->need_attribute(scene, ATTR_STD_PARTICLE)) return false; /* don't handle child particles yet */ @@ -53,10 +53,10 @@ bool BlenderSync::sync_dupli_particle(BL::Object &b_ob, ParticleSystem *psys; bool first_use = !particle_system_map.is_used(key); - bool need_update = particle_system_map.sync(&psys, b_ob, b_instance.object(), key); + bool need_update = particle_system_map.add_or_update(&psys, b_ob, b_instance.object(), key); /* no update needed? */ - if (!need_update && !object->mesh->need_update && !scene->object_manager->need_update) + if (!need_update && !object->geometry->need_update && !scene->object_manager->need_update) return true; /* first time used in this sync loop? clear and tag update */ diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp index 335d4daf09c..89bcebda193 100644 --- a/intern/cycles/blender/blender_python.cpp +++ b/intern/cycles/blender/blender_python.cpp @@ -19,8 +19,9 @@ #include "blender/CCL_api.h" #include "blender/blender_device.h" -#include "blender/blender_sync.h" #include "blender/blender_session.h" +#include "blender/blender_sync.h" +#include "blender/blender_util.h" #include "render/denoising.h" #include "render/merge.h" @@ -37,8 +38,8 @@ #ifdef WITH_OSL # include "render/osl.h" -# include <OSL/oslquery.h> # include <OSL/oslconfig.h> +# include <OSL/oslquery.h> #endif #ifdef WITH_OPENCL diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index e2dea24fdd1..5ea96d6bdfd 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -41,8 +41,8 @@ #include "util/util_progress.h" #include "util/util_time.h" -#include "blender/blender_sync.h" #include "blender/blender_session.h" +#include "blender/blender_sync.h" #include "blender/blender_util.h" CCL_NAMESPACE_BEGIN @@ -138,14 +138,6 @@ void BlenderSession::create_session() scene = new Scene(scene_params, session->device); scene->name = b_scene.name(); - /* setup callbacks for builtin image support */ - scene->image_manager->builtin_image_info_cb = function_bind( - &BlenderSession::builtin_image_info, this, _1, _2, _3); - scene->image_manager->builtin_image_pixels_cb = function_bind( - &BlenderSession::builtin_image_pixels, this, _1, _2, _3, _4, _5, _6, _7); - scene->image_manager->builtin_image_float_pixels_cb = function_bind( - &BlenderSession::builtin_image_float_pixels, this, _1, _2, _3, _4, _5, _6, _7); - session->scene = scene; /* There is no single depsgraph to use for the entire render. @@ -166,7 +158,7 @@ void BlenderSession::create_session() /* set buffer parameters */ BufferParams buffer_params = BlenderSync::get_buffer_params( - b_render, b_v3d, b_rv3d, scene->camera, width, height); + b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height); session->reset(buffer_params, session_params.samples); b_engine.use_highlight_tiles(session_params.progressive_refine == false); @@ -244,7 +236,7 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg BL::SpaceView3D b_null_space_view3d(PointerRNA_NULL); BL::RegionView3D b_null_region_view3d(PointerRNA_NULL); BufferParams buffer_params = BlenderSync::get_buffer_params( - b_render, b_null_space_view3d, b_null_region_view3d, scene->camera, width, height); + b_scene, b_render, b_null_space_view3d, b_null_region_view3d, scene->camera, width, height); session->reset(buffer_params, session_params.samples); b_engine.use_highlight_tiles(session_params.progressive_refine == false); @@ -278,8 +270,6 @@ static ShaderEvalType get_shader_type(const string &pass_type) return SHADER_EVAL_GLOSSY_COLOR; else if (strcmp(shader_type, "TRANSMISSION_COLOR") == 0) return SHADER_EVAL_TRANSMISSION_COLOR; - else if (strcmp(shader_type, "SUBSURFACE_COLOR") == 0) - return SHADER_EVAL_SUBSURFACE_COLOR; else if (strcmp(shader_type, "EMIT") == 0) return SHADER_EVAL_EMISSION; @@ -296,8 +286,6 @@ static ShaderEvalType get_shader_type(const string &pass_type) return SHADER_EVAL_GLOSSY; else if (strcmp(shader_type, "TRANSMISSION") == 0) return SHADER_EVAL_TRANSMISSION; - else if (strcmp(shader_type, "SUBSURFACE") == 0) - return SHADER_EVAL_SUBSURFACE; /* extra */ else if (strcmp(shader_type, "ENVIRONMENT") == 0) @@ -460,7 +448,7 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_) SessionParams session_params = BlenderSync::get_session_params( b_engine, b_userpref, b_scene, background); BufferParams buffer_params = BlenderSync::get_buffer_params( - b_render, b_v3d, b_rv3d, scene->camera, width, height); + b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height); /* render each layer */ BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval(); @@ -474,7 +462,8 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_) b_rlay_name = b_view_layer.name(); /* add passes */ - vector<Pass> passes = sync->sync_render_passes(b_rlay, b_view_layer); + vector<Pass> passes = sync->sync_render_passes( + b_rlay, b_view_layer, session_params.adaptive_sampling); buffer_params.passes = passes; PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles"); @@ -640,8 +629,6 @@ static int bake_pass_filter_get(const int pass_filter) flag |= BAKE_FILTER_GLOSSY; if ((pass_filter & BL::BakeSettings::pass_filter_TRANSMISSION) != 0) flag |= BAKE_FILTER_TRANSMISSION; - if ((pass_filter & BL::BakeSettings::pass_filter_SUBSURFACE) != 0) - flag |= BAKE_FILTER_SUBSURFACE; if ((pass_filter & BL::BakeSettings::pass_filter_EMIT) != 0) flag |= BAKE_FILTER_EMISSION; @@ -706,7 +693,7 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_, SessionParams session_params = BlenderSync::get_session_params( b_engine, b_userpref, b_scene, background); BufferParams buffer_params = BlenderSync::get_buffer_params( - b_render, b_v3d, b_rv3d, scene->camera, width, height); + b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height); scene->bake_manager->set_shader_limit((size_t)b_engine.tile_x(), (size_t)b_engine.tile_y()); @@ -720,9 +707,12 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_, int tri_offset = 0; for (size_t i = 0; i < scene->objects.size(); i++) { - if (strcmp(scene->objects[i]->name.c_str(), b_object.name().c_str()) == 0) { + const Object *object = scene->objects[i]; + const Geometry *geom = object->geometry; + if (object->name == b_object.name() && geom->type == Geometry::MESH) { + const Mesh *mesh = static_cast<const Mesh *>(geom); object_index = i; - tri_offset = scene->objects[i]->mesh->tri_offset; + tri_offset = mesh->prim_offset; break; } } @@ -848,11 +838,11 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_) if (session->params.modified(session_params) || scene->params.modified(scene_params)) { free_session(); create_session(); - return; } /* increase samples, but never decrease */ session->set_samples(session_params.samples); + session->set_denoising_start_sample(session_params.denoising_start_sample); session->set_pause(session_pause); /* copy recalc flags, outside of mutex so we can decide to do the real @@ -883,10 +873,28 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_) else sync->sync_camera(b_render, b_camera_override, width, height, ""); + /* get buffer parameters */ + BufferParams buffer_params = BlenderSync::get_buffer_params( + b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height); + + if (session_params.device.type != DEVICE_OPTIX && + session_params.device.denoising_devices.empty()) { + /* cannot use OptiX denoising when it is not supported by the device. */ + buffer_params.denoising_data_pass = false; + } + else { + session->set_denoising(buffer_params.denoising_data_pass, true); + } + + if (scene->film->denoising_data_pass != buffer_params.denoising_data_pass) { + scene->film->denoising_data_pass = buffer_params.denoising_data_pass; + + /* Force a scene and session reset below. */ + scene->film->tag_update(scene); + } + /* reset if needed */ if (scene->need_reset()) { - BufferParams buffer_params = BlenderSync::get_buffer_params( - b_render, b_v3d, b_rv3d, scene->camera, width, height); session->reset(buffer_params, session_params.samples); /* After session reset, so device is not accessing image data anymore. */ @@ -953,7 +961,7 @@ bool BlenderSession::draw(int w, int h) SessionParams session_params = BlenderSync::get_session_params( b_engine, b_userpref, b_scene, background); BufferParams buffer_params = BlenderSync::get_buffer_params( - b_render, b_v3d, b_rv3d, scene->camera, width, height); + b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height); bool session_pause = BlenderSync::get_session_pause(b_scene, background); if (session_pause == false) { @@ -971,7 +979,7 @@ bool BlenderSession::draw(int w, int h) /* draw */ BufferParams buffer_params = BlenderSync::get_buffer_params( - b_render, b_v3d, b_rv3d, scene->camera, width, height); + b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height); DeviceDrawParams draw_params; if (session->params.display_buffer_linear) { @@ -1112,341 +1120,6 @@ void BlenderSession::test_cancel() session->progress.set_cancel("Cancelled"); } -/* builtin image file name is actually an image datablock name with - * absolute sequence frame number concatenated via '@' character - * - * this function splits frame from builtin name - */ -int BlenderSession::builtin_image_frame(const string &builtin_name) -{ - int last = builtin_name.find_last_of('@'); - return atoi(builtin_name.substr(last + 1, builtin_name.size() - last - 1).c_str()); -} - -void BlenderSession::builtin_image_info(const string &builtin_name, - void *builtin_data, - ImageMetaData &metadata) -{ - /* empty image */ - metadata.width = 1; - metadata.height = 1; - - if (!builtin_data) - return; - - /* recover ID pointer */ - PointerRNA ptr; - RNA_id_pointer_create((ID *)builtin_data, &ptr); - BL::ID b_id(ptr); - - if (b_id.is_a(&RNA_Image)) { - /* image data */ - BL::Image b_image(b_id); - - metadata.builtin_free_cache = !b_image.has_data(); - metadata.is_float = b_image.is_float(); - metadata.width = b_image.size()[0]; - metadata.height = b_image.size()[1]; - metadata.depth = 1; - metadata.channels = b_image.channels(); - - if (metadata.is_float) { - /* Float images are already converted on the Blender side, - * no need to do anything in Cycles. */ - metadata.colorspace = u_colorspace_raw; - } - } - else if (b_id.is_a(&RNA_Object)) { - /* smoke volume data */ - BL::Object b_ob(b_id); - BL::FluidDomainSettings b_domain = object_fluid_domain_find(b_ob); - - metadata.is_float = true; - metadata.depth = 1; - metadata.channels = 1; - - if (!b_domain) - return; - - if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY) || - builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME) || - builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT) || - builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_TEMPERATURE)) - metadata.channels = 1; - else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR)) - metadata.channels = 4; - else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY)) - metadata.channels = 3; - else - return; - - int3 resolution = get_int3(b_domain.domain_resolution()); - int amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1; - - /* Velocity and heat data is always low-resolution. */ - if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY) || - builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) { - amplify = 1; - } - - metadata.width = resolution.x * amplify; - metadata.height = resolution.y * amplify; - metadata.depth = resolution.z * amplify; - } - else { - /* TODO(sergey): Check we're indeed in shader node tree. */ - PointerRNA ptr; - RNA_pointer_create(NULL, &RNA_Node, builtin_data, &ptr); - BL::Node b_node(ptr); - if (b_node.is_a(&RNA_ShaderNodeTexPointDensity)) { - BL::ShaderNodeTexPointDensity b_point_density_node(b_node); - metadata.channels = 4; - metadata.width = b_point_density_node.resolution(); - metadata.height = metadata.width; - metadata.depth = metadata.width; - metadata.is_float = true; - } - } -} - -bool BlenderSession::builtin_image_pixels(const string &builtin_name, - void *builtin_data, - int tile, - unsigned char *pixels, - const size_t pixels_size, - const bool associate_alpha, - const bool free_cache) -{ - if (!builtin_data) { - return false; - } - - const int frame = builtin_image_frame(builtin_name); - - PointerRNA ptr; - RNA_id_pointer_create((ID *)builtin_data, &ptr); - BL::Image b_image(ptr); - - const int width = b_image.size()[0]; - const int height = b_image.size()[1]; - const int channels = b_image.channels(); - - unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame, tile); - const size_t num_pixels = ((size_t)width) * height; - - if (image_pixels && num_pixels * channels == pixels_size) { - memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char)); - } - else { - if (channels == 1) { - memset(pixels, 0, pixels_size * sizeof(unsigned char)); - } - else { - const size_t num_pixels_safe = pixels_size / channels; - unsigned char *cp = pixels; - for (size_t i = 0; i < num_pixels_safe; i++, cp += channels) { - cp[0] = 255; - cp[1] = 0; - cp[2] = 255; - if (channels == 4) { - cp[3] = 255; - } - } - } - } - - if (image_pixels) { - MEM_freeN(image_pixels); - } - - /* Free image buffers to save memory during render. */ - if (free_cache) { - b_image.buffers_free(); - } - - if (associate_alpha) { - /* Premultiply, byte images are always straight for Blender. */ - unsigned char *cp = pixels; - for (size_t i = 0; i < num_pixels; i++, cp += channels) { - cp[0] = (cp[0] * cp[3]) >> 8; - cp[1] = (cp[1] * cp[3]) >> 8; - cp[2] = (cp[2] * cp[3]) >> 8; - } - } - return true; -} - -bool BlenderSession::builtin_image_float_pixels(const string &builtin_name, - void *builtin_data, - int tile, - float *pixels, - const size_t pixels_size, - const bool, - const bool free_cache) -{ - if (!builtin_data) { - return false; - } - - PointerRNA ptr; - RNA_id_pointer_create((ID *)builtin_data, &ptr); - BL::ID b_id(ptr); - - if (b_id.is_a(&RNA_Image)) { - /* image data */ - BL::Image b_image(b_id); - int frame = builtin_image_frame(builtin_name); - - const int width = b_image.size()[0]; - const int height = b_image.size()[1]; - const int channels = b_image.channels(); - - float *image_pixels; - image_pixels = image_get_float_pixels_for_frame(b_image, frame, tile); - const size_t num_pixels = ((size_t)width) * height; - - if (image_pixels && num_pixels * channels == pixels_size) { - memcpy(pixels, image_pixels, pixels_size * sizeof(float)); - } - else { - if (channels == 1) { - memset(pixels, 0, num_pixels * sizeof(float)); - } - else { - const size_t num_pixels_safe = pixels_size / channels; - float *fp = pixels; - for (int i = 0; i < num_pixels_safe; i++, fp += channels) { - fp[0] = 1.0f; - fp[1] = 0.0f; - fp[2] = 1.0f; - if (channels == 4) { - fp[3] = 1.0f; - } - } - } - } - - if (image_pixels) { - MEM_freeN(image_pixels); - } - - /* Free image buffers to save memory during render. */ - if (free_cache) { - b_image.buffers_free(); - } - - return true; - } - else if (b_id.is_a(&RNA_Object)) { - /* smoke volume data */ - BL::Object b_ob(b_id); - BL::FluidDomainSettings b_domain = object_fluid_domain_find(b_ob); - - if (!b_domain) { - return false; - } -#if WITH_FLUID - int3 resolution = get_int3(b_domain.domain_resolution()); - int length, amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1; - - /* Velocity and heat data is always low-resolution. */ - if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY) || - builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) { - amplify = 1; - } - - const int width = resolution.x * amplify; - const int height = resolution.y * amplify; - const int depth = resolution.z * amplify; - const size_t num_pixels = ((size_t)width) * height * depth; - - if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY)) { - FluidDomainSettings_density_grid_get_length(&b_domain.ptr, &length); - if (length == num_pixels) { - FluidDomainSettings_density_grid_get(&b_domain.ptr, pixels); - return true; - } - } - else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME)) { - /* this is in range 0..1, and interpreted by the OpenGL smoke viewer - * as 1500..3000 K with the first part faded to zero density */ - FluidDomainSettings_flame_grid_get_length(&b_domain.ptr, &length); - if (length == num_pixels) { - FluidDomainSettings_flame_grid_get(&b_domain.ptr, pixels); - return true; - } - } - else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR)) { - /* the RGB is "premultiplied" by density for better interpolation results */ - FluidDomainSettings_color_grid_get_length(&b_domain.ptr, &length); - if (length == num_pixels * 4) { - FluidDomainSettings_color_grid_get(&b_domain.ptr, pixels); - return true; - } - } - else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY)) { - FluidDomainSettings_velocity_grid_get_length(&b_domain.ptr, &length); - if (length == num_pixels * 3) { - FluidDomainSettings_velocity_grid_get(&b_domain.ptr, pixels); - return true; - } - } - else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) { - FluidDomainSettings_heat_grid_get_length(&b_domain.ptr, &length); - if (length == num_pixels) { - FluidDomainSettings_heat_grid_get(&b_domain.ptr, pixels); - return true; - } - } - else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_TEMPERATURE)) { - FluidDomainSettings_temperature_grid_get_length(&b_domain.ptr, &length); - if (length == num_pixels) { - FluidDomainSettings_temperature_grid_get(&b_domain.ptr, pixels); - return true; - } - } - else { - fprintf( - stderr, "Cycles error: unknown volume attribute %s, skipping\n", builtin_name.c_str()); - pixels[0] = 0.0f; - return false; - } -#endif - fprintf(stderr, "Cycles error: unexpected smoke volume resolution, skipping\n"); - } - else { - /* We originally were passing view_layer here but in reality we need a - * a depsgraph to pass to the RE_point_density_minmax() function. - */ - /* TODO(sergey): Check we're indeed in shader node tree. */ - PointerRNA ptr; - RNA_pointer_create(NULL, &RNA_Node, builtin_data, &ptr); - BL::Node b_node(ptr); - if (b_node.is_a(&RNA_ShaderNodeTexPointDensity)) { - BL::ShaderNodeTexPointDensity b_point_density_node(b_node); - int length; - b_point_density_node.calc_point_density(b_depsgraph, &length, &pixels); - } - } - - return false; -} - -void BlenderSession::builtin_images_load() -{ - /* Force builtin images to be loaded along with Blender data sync. This - * is needed because we may be reading from depsgraph evaluated data which - * can be freed by Blender before Cycles reads it. - * - * TODO: the assumption that no further access to builtin image data will - * happen is really weak, and likely to break in the future. We should find - * a better solution to hand over the data directly to the image manager - * instead of through callbacks whose timing is difficult to control. */ - ImageManager *manager = session->scene->image_manager; - Device *device = session->device; - manager->device_load_builtin(device, session->scene, session->progress); -} - void BlenderSession::update_resumable_tile_manager(int num_samples) { const int num_resumable_chunks = BlenderSession::num_resumable_chunks, diff --git a/intern/cycles/blender/blender_session.h b/intern/cycles/blender/blender_session.h index 2f25ec740f9..3e6498bb655 100644 --- a/intern/cycles/blender/blender_session.h +++ b/intern/cycles/blender/blender_session.h @@ -17,15 +17,19 @@ #ifndef __BLENDER_SESSION_H__ #define __BLENDER_SESSION_H__ +#include "RNA_blender_cpp.h" + #include "device/device.h" + +#include "render/bake.h" #include "render/scene.h" #include "render/session.h" -#include "render/bake.h" #include "util/util_vector.h" CCL_NAMESPACE_BEGIN +class BlenderSync; class ImageMetaData; class Scene; class Session; @@ -153,22 +157,6 @@ class BlenderSession { bool do_update_only); void do_write_update_render_tile(RenderTile &rtile, bool do_update_only, bool highlight); - int builtin_image_frame(const string &builtin_name); - void builtin_image_info(const string &builtin_name, void *builtin_data, ImageMetaData &metadata); - bool builtin_image_pixels(const string &builtin_name, - void *builtin_data, - int tile, - unsigned char *pixels, - const size_t pixels_size, - const bool associate_alpha, - const bool free_cache); - bool builtin_image_float_pixels(const string &builtin_name, - void *builtin_data, - int tile, - float *pixels, - const size_t pixels_size, - const bool associate_alpha, - const bool free_cache); void builtin_images_load(); /* Update tile manager to reflect resumable render settings. */ diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp index 206058259af..edde1fd243e 100644 --- a/intern/cycles/blender/blender_shader.cpp +++ b/intern/cycles/blender/blender_shader.cpp @@ -23,14 +23,15 @@ #include "render/scene.h" #include "render/shader.h" -#include "blender/blender_texture.h" +#include "blender/blender_image.h" #include "blender/blender_sync.h" +#include "blender/blender_texture.h" #include "blender/blender_util.h" #include "util/util_debug.h" #include "util/util_foreach.h" -#include "util/util_string.h" #include "util/util_set.h" +#include "util/util_string.h" #include "util/util_task.h" CCL_NAMESPACE_BEGIN @@ -323,6 +324,13 @@ static ShaderNode *add_node(Scene *scene, vector_math_node->type = (NodeVectorMathType)b_vector_math_node.operation(); node = vector_math_node; } + else if (b_node.is_a(&RNA_ShaderNodeVectorRotate)) { + BL::ShaderNodeVectorRotate b_vector_rotate_node(b_node); + VectorRotateNode *vector_rotate_node = new VectorRotateNode(); + vector_rotate_node->type = (NodeVectorRotateType)b_vector_rotate_node.rotation_type(); + vector_rotate_node->invert = b_vector_rotate_node.invert(); + node = vector_rotate_node; + } else if (b_node.is_a(&RNA_ShaderNodeVectorTransform)) { BL::ShaderNodeVectorTransform b_vector_transform_node(b_node); VectorTransformNode *vtransform = new VectorTransformNode(); @@ -612,16 +620,16 @@ static ShaderNode *add_node(Scene *scene, /* create script node */ BL::ShaderNodeScript b_script_node(b_node); - OSLShaderManager *manager = (OSLShaderManager *)scene->shader_manager; + ShaderManager *manager = scene->shader_manager; string bytecode_hash = b_script_node.bytecode_hash(); if (!bytecode_hash.empty()) { - node = manager->osl_node("", bytecode_hash, b_script_node.bytecode()); + node = OSLShaderManager::osl_node(manager, "", bytecode_hash, b_script_node.bytecode()); } else { string absolute_filepath = blender_absolute_path( b_data, b_ntree, b_script_node.filepath()); - node = manager->osl_node(absolute_filepath, ""); + node = OSLShaderManager::osl_node(manager, absolute_filepath, ""); } } #else @@ -634,7 +642,27 @@ static ShaderNode *add_node(Scene *scene, BL::Image b_image(b_image_node.image()); BL::ImageUser b_image_user(b_image_node.image_user()); ImageTextureNode *image = new ImageTextureNode(); + + image->interpolation = get_image_interpolation(b_image_node); + image->extension = get_image_extension(b_image_node); + image->projection = (NodeImageProjection)b_image_node.projection(); + image->projection_blend = b_image_node.projection_blend(); + BL::TexMapping b_texture_mapping(b_image_node.texture_mapping()); + get_tex_mapping(&image->tex_mapping, b_texture_mapping); + if (b_image) { + PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr; + image->colorspace = get_enum_identifier(colorspace_ptr, "name"); + + image->animated = b_image_node.image_user().use_auto_refresh(); + image->alpha_type = get_image_alpha_type(b_image); + + image->tiles.clear(); + BL::Image::tiles_iterator b_iter; + for (b_image.tiles.begin(b_iter); b_iter != b_image.tiles.end(); ++b_iter) { + image->tiles.push_back(b_iter->number()); + } + /* builtin images will use callback-based reading because * they could only be loaded correct from blender side */ @@ -651,46 +679,14 @@ static ShaderNode *add_node(Scene *scene, */ int scene_frame = b_scene.frame_current(); int image_frame = image_user_frame_number(b_image_user, scene_frame); - image->filename = b_image.name() + "@" + string_printf("%d", image_frame); - image->builtin_data = b_image.ptr.data; + image->handle = scene->image_manager->add_image( + new BlenderImageLoader(b_image, image_frame), image->image_params()); } else { image->filename = image_user_file_path( b_image_user, b_image, b_scene.frame_current(), true); - image->builtin_data = NULL; - } - - PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr; - image->colorspace = get_enum_identifier(colorspace_ptr, "name"); - - image->animated = b_image_node.image_user().use_auto_refresh(); - image->alpha_type = get_image_alpha_type(b_image); - - image->tiles.clear(); - BL::Image::tiles_iterator b_iter; - for (b_image.tiles.begin(b_iter); b_iter != b_image.tiles.end(); ++b_iter) { - image->tiles.push_back(b_iter->number()); } - - /* TODO: restore */ - /* TODO(sergey): Does not work properly when we change builtin type. */ -#if 0 - if (b_image.is_updated()) { - scene->image_manager->tag_reload_image(image->filename.string(), - image->builtin_data, - get_image_interpolation(b_image_node), - get_image_extension(b_image_node), - image->use_alpha, - image->colorspace); - } -#endif } - image->projection = (NodeImageProjection)b_image_node.projection(); - image->interpolation = get_image_interpolation(b_image_node); - image->extension = get_image_extension(b_image_node); - image->projection_blend = b_image_node.projection_blend(); - BL::TexMapping b_texture_mapping(b_image_node.texture_mapping()); - get_tex_mapping(&image->tex_mapping, b_texture_mapping); node = image; } else if (b_node.is_a(&RNA_ShaderNodeTexEnvironment)) { @@ -698,7 +694,19 @@ static ShaderNode *add_node(Scene *scene, BL::Image b_image(b_env_node.image()); BL::ImageUser b_image_user(b_env_node.image_user()); EnvironmentTextureNode *env = new EnvironmentTextureNode(); + + env->interpolation = get_image_interpolation(b_env_node); + env->projection = (NodeEnvironmentProjection)b_env_node.projection(); + BL::TexMapping b_texture_mapping(b_env_node.texture_mapping()); + get_tex_mapping(&env->tex_mapping, b_texture_mapping); + if (b_image) { + PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr; + env->colorspace = get_enum_identifier(colorspace_ptr, "name"); + + env->animated = b_env_node.image_user().use_auto_refresh(); + env->alpha_type = get_image_alpha_type(b_image); + bool is_builtin = b_image.packed_file() || b_image.source() == BL::Image::source_GENERATED || b_image.source() == BL::Image::source_MOVIE || (b_engine.is_preview() && b_image.source() != BL::Image::source_SEQUENCE); @@ -706,38 +714,14 @@ static ShaderNode *add_node(Scene *scene, if (is_builtin) { int scene_frame = b_scene.frame_current(); int image_frame = image_user_frame_number(b_image_user, scene_frame); - env->filename = b_image.name() + "@" + string_printf("%d", image_frame); - env->builtin_data = b_image.ptr.data; + env->handle = scene->image_manager->add_image(new BlenderImageLoader(b_image, image_frame), + env->image_params()); } else { env->filename = image_user_file_path( b_image_user, b_image, b_scene.frame_current(), false); - env->builtin_data = NULL; - } - - PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr; - env->colorspace = get_enum_identifier(colorspace_ptr, "name"); - - env->animated = b_env_node.image_user().use_auto_refresh(); - env->alpha_type = get_image_alpha_type(b_image); - - /* TODO: restore */ - /* TODO(sergey): Does not work properly when we change builtin type. */ -#if 0 - if (b_image.is_updated()) { - scene->image_manager->tag_reload_image(env->filename.string(), - env->builtin_data, - get_image_interpolation(b_env_node), - EXTENSION_REPEAT, - env->use_alpha, - env->colorspace); } -#endif } - env->interpolation = get_image_interpolation(b_env_node); - env->projection = (NodeEnvironmentProjection)b_env_node.projection(); - BL::TexMapping b_texture_mapping(b_env_node.texture_mapping()); - get_tex_mapping(&env->tex_mapping, b_texture_mapping); node = env; } else if (b_node.is_a(&RNA_ShaderNodeTexGradient)) { @@ -770,6 +754,8 @@ static ShaderNode *add_node(Scene *scene, BL::ShaderNodeTexWave b_wave_node(b_node); WaveTextureNode *wave = new WaveTextureNode(); wave->type = (NodeWaveType)b_wave_node.wave_type(); + wave->bands_direction = (NodeWaveBandsDirection)b_wave_node.bands_direction(); + wave->rings_direction = (NodeWaveRingsDirection)b_wave_node.rings_direction(); wave->profile = (NodeWaveProfile)b_wave_node.wave_profile(); BL::TexMapping b_texture_mapping(b_wave_node.texture_mapping()); get_tex_mapping(&wave->tex_mapping, b_texture_mapping); @@ -878,23 +864,13 @@ static ShaderNode *add_node(Scene *scene, else if (b_node.is_a(&RNA_ShaderNodeTexPointDensity)) { BL::ShaderNodeTexPointDensity b_point_density_node(b_node); PointDensityTextureNode *point_density = new PointDensityTextureNode(); - point_density->filename = b_point_density_node.name(); point_density->space = (NodeTexVoxelSpace)b_point_density_node.space(); point_density->interpolation = get_image_interpolation(b_point_density_node); - point_density->builtin_data = b_point_density_node.ptr.data; - point_density->image_manager = scene->image_manager; - - /* TODO(sergey): Use more proper update flag. */ - if (true) { - point_density->add_image(); - b_point_density_node.cache_point_density(b_depsgraph); - scene->image_manager->tag_reload_image(point_density->filename.string(), - point_density->builtin_data, - point_density->interpolation, - EXTENSION_CLIP, - IMAGE_ALPHA_AUTO, - u_colorspace_raw); - } + point_density->handle = scene->image_manager->add_image( + new BlenderPointDensityLoader(b_depsgraph, b_point_density_node), + point_density->image_params()); + + b_point_density_node.cache_point_density(b_depsgraph); node = point_density; /* Transformation form world space to texture space. @@ -1255,7 +1231,7 @@ void BlenderSync::sync_materials(BL::Depsgraph &b_depsgraph, bool update_all) Shader *shader; /* test if we need to sync */ - if (shader_map.sync(&shader, b_mat) || shader->need_sync_object || update_all) { + if (shader_map.add_or_update(&shader, b_mat) || shader->need_sync_object || update_all) { ShaderGraph *graph = new ShaderGraph(); shader->name = b_mat.name().c_str(); @@ -1284,6 +1260,7 @@ void BlenderSync::sync_materials(BL::Depsgraph &b_depsgraph, bool update_all) shader->heterogeneous_volume = !get_boolean(cmat, "homogeneous_volume"); shader->volume_sampling_method = get_volume_sampling(cmat); shader->volume_interpolation_method = get_volume_interpolation(cmat); + shader->volume_step_rate = get_float(cmat, "volume_step_rate"); shader->displacement_method = get_displacement_method(cmat); shader->set_graph(graph); @@ -1348,6 +1325,7 @@ void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, shader->heterogeneous_volume = !get_boolean(cworld, "homogeneous_volume"); shader->volume_sampling_method = get_volume_sampling(cworld); shader->volume_interpolation_method = get_volume_interpolation(cworld); + shader->volume_step_rate = get_float(cworld, "volume_step_size"); } else if (new_viewport_parameters.use_scene_world && b_world) { BackgroundNode *background = new BackgroundNode(); @@ -1480,7 +1458,7 @@ void BlenderSync::sync_lights(BL::Depsgraph &b_depsgraph, bool update_all) Shader *shader; /* test if we need to sync */ - if (shader_map.sync(&shader, b_light) || update_all) { + if (shader_map.add_or_update(&shader, b_light) || update_all) { ShaderGraph *graph = new ShaderGraph(); /* create nodes */ diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index 20dbe23cdb7..28a737c3341 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -16,6 +16,7 @@ #include "render/background.h" #include "render/camera.h" +#include "render/curves.h" #include "render/film.h" #include "render/graph.h" #include "render/integrator.h" @@ -25,19 +26,18 @@ #include "render/object.h" #include "render/scene.h" #include "render/shader.h" -#include "render/curves.h" #include "device/device.h" #include "blender/blender_device.h" -#include "blender/blender_sync.h" #include "blender/blender_session.h" +#include "blender/blender_sync.h" #include "blender/blender_util.h" #include "util/util_debug.h" #include "util/util_foreach.h" -#include "util/util_opengl.h" #include "util/util_hash.h" +#include "util/util_opengl.h" CCL_NAMESPACE_BEGIN @@ -56,7 +56,7 @@ BlenderSync::BlenderSync(BL::RenderEngine &b_engine, b_scene(b_scene), shader_map(&scene->shaders), object_map(&scene->objects), - mesh_map(&scene->meshes), + geometry_map(&scene->geometry), light_map(&scene->lights), particle_system_map(&scene->particle_systems), world_map(NULL), @@ -108,10 +108,15 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d } if (dicing_prop_changed) { - for (const pair<void *, Mesh *> &iter : mesh_map.key_to_scene_data()) { - Mesh *mesh = iter.second; - if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) { - mesh_map.set_recalc(iter.first); + for (const pair<GeometryKey, Geometry *> &iter : geometry_map.key_to_scene_data()) { + Geometry *geom = iter.second; + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) { + PointerRNA id_ptr; + RNA_id_pointer_create((::ID *)iter.first.id, &id_ptr); + geometry_map.set_recalc(BL::ID(id_ptr)); + } } } } @@ -146,7 +151,7 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d if (updated_geometry || (object_subdivision_type(b_ob, preview, experimental) != Mesh::SUBDIVISION_NONE)) { BL::ID key = BKE_object_is_modified(b_ob) ? b_ob : b_ob.data(); - mesh_map.set_recalc(key); + geometry_map.set_recalc(key); } } else if (object_is_light(b_ob)) { @@ -164,7 +169,7 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d /* Mesh */ else if (b_id.is_a(&RNA_Mesh)) { BL::Mesh b_mesh(b_id); - mesh_map.set_recalc(b_mesh); + geometry_map.set_recalc(b_mesh); } /* World */ else if (b_id.is_a(&RNA_World)) { @@ -173,6 +178,11 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d world_recalc = true; } } + /* Volume */ + else if (b_id.is_a(&RNA_Volume)) { + BL::Volume b_volume(b_id); + geometry_map.set_recalc(b_volume); + } } BlenderViewportParameters new_viewport_parameters(b_v3d); @@ -211,7 +221,7 @@ void BlenderSync::sync_data(BL::RenderSettings &b_render, sync_images(); sync_curve_settings(); - mesh_synced.clear(); /* use for objects and motion sync */ + geometry_synced.clear(); /* use for objects and motion sync */ if (scene->need_motion() == Scene::MOTION_PASS || scene->need_motion() == Scene::MOTION_NONE || scene->camera->motion_position == Camera::MOTION_POSITION_CENTER) { @@ -219,7 +229,7 @@ void BlenderSync::sync_data(BL::RenderSettings &b_render, } sync_motion(b_render, b_depsgraph, b_v3d, b_override, width, height, python_thread_state); - mesh_synced.clear(); + geometry_synced.clear(); /* Shader sync done at the end, since object sync uses it. * false = don't delete unused shaders, not supported. */ @@ -252,7 +262,8 @@ void BlenderSync::sync_integrator() integrator->transparent_max_bounce = get_int(cscene, "transparent_max_bounces"); integrator->volume_max_steps = get_int(cscene, "volume_max_steps"); - integrator->volume_step_size = get_float(cscene, "volume_step_size"); + integrator->volume_step_rate = (preview) ? get_float(cscene, "volume_preview_step_rate") : + get_float(cscene, "volume_step_rate"); integrator->caustics_reflective = get_boolean(cscene, "caustics_reflective"); integrator->caustics_refractive = get_boolean(cscene, "caustics_refractive"); @@ -291,6 +302,16 @@ void BlenderSync::sync_integrator() integrator->sample_all_lights_indirect = get_boolean(cscene, "sample_all_lights_indirect"); integrator->light_sampling_threshold = get_float(cscene, "light_sampling_threshold"); + if (RNA_boolean_get(&cscene, "use_adaptive_sampling")) { + integrator->sampling_pattern = SAMPLING_PATTERN_PMJ; + integrator->adaptive_min_samples = get_int(cscene, "adaptive_min_samples"); + integrator->adaptive_threshold = get_float(cscene, "adaptive_threshold"); + } + else { + integrator->adaptive_min_samples = INT_MAX; + integrator->adaptive_threshold = 0.0f; + } + int diffuse_samples = get_int(cscene, "diffuse_samples"); int glossy_samples = get_int(cscene, "glossy_samples"); int transmission_samples = get_int(cscene, "transmission_samples"); @@ -307,6 +328,8 @@ void BlenderSync::sync_integrator() integrator->mesh_light_samples = mesh_light_samples * mesh_light_samples; integrator->subsurface_samples = subsurface_samples * subsurface_samples; integrator->volume_samples = volume_samples * volume_samples; + integrator->adaptive_min_samples = min( + integrator->adaptive_min_samples * integrator->adaptive_min_samples, INT_MAX); } else { integrator->diffuse_samples = diffuse_samples; @@ -388,6 +411,7 @@ void BlenderSync::sync_view_layer(BL::SpaceView3D & /*b_v3d*/, BL::ViewLayer &b_ view_layer.use_background_ao = b_view_layer.use_ao(); view_layer.use_surfaces = b_view_layer.use_solid(); view_layer.use_hair = b_view_layer.use_strand(); + view_layer.use_volumes = b_view_layer.use_volumes(); /* Material override. */ view_layer.material_override = b_view_layer.material_override(); @@ -456,19 +480,16 @@ PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass) MAP_PASS("DiffDir", PASS_DIFFUSE_DIRECT); MAP_PASS("GlossDir", PASS_GLOSSY_DIRECT); MAP_PASS("TransDir", PASS_TRANSMISSION_DIRECT); - MAP_PASS("SubsurfaceDir", PASS_SUBSURFACE_DIRECT); MAP_PASS("VolumeDir", PASS_VOLUME_DIRECT); MAP_PASS("DiffInd", PASS_DIFFUSE_INDIRECT); MAP_PASS("GlossInd", PASS_GLOSSY_INDIRECT); MAP_PASS("TransInd", PASS_TRANSMISSION_INDIRECT); - MAP_PASS("SubsurfaceInd", PASS_SUBSURFACE_INDIRECT); MAP_PASS("VolumeInd", PASS_VOLUME_INDIRECT); MAP_PASS("DiffCol", PASS_DIFFUSE_COLOR); MAP_PASS("GlossCol", PASS_GLOSSY_COLOR); MAP_PASS("TransCol", PASS_TRANSMISSION_COLOR); - MAP_PASS("SubsurfaceCol", PASS_SUBSURFACE_COLOR); MAP_PASS("Emit", PASS_EMISSION); MAP_PASS("Env", PASS_BACKGROUND); @@ -482,6 +503,8 @@ PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass) MAP_PASS("Debug Ray Bounces", PASS_RAY_BOUNCES); #endif MAP_PASS("Debug Render Time", PASS_RENDER_TIME); + MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER); + MAP_PASS("Debug Sample Count", PASS_SAMPLE_COUNT); if (string_startswith(name, cryptomatte_prefix)) { return PASS_CRYPTOMATTE; } @@ -517,7 +540,9 @@ int BlenderSync::get_denoising_pass(BL::RenderPass &b_pass) return -1; } -vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_view_layer) +vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, + BL::ViewLayer &b_view_layer, + bool adaptive_sampling) { vector<Pass> passes; @@ -551,8 +576,6 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa MAP_OPTION("denoising_glossy_indirect", DENOISING_CLEAN_GLOSSY_IND); MAP_OPTION("denoising_transmission_direct", DENOISING_CLEAN_TRANSMISSION_DIR); MAP_OPTION("denoising_transmission_indirect", DENOISING_CLEAN_TRANSMISSION_IND); - MAP_OPTION("denoising_subsurface_direct", DENOISING_CLEAN_SUBSURFACE_DIR); - MAP_OPTION("denoising_subsurface_indirect", DENOISING_CLEAN_SUBSURFACE_IND); #undef MAP_OPTION } b_engine.add_pass("Noisy Image", 4, "RGBA", b_view_layer.name().c_str()); @@ -595,6 +618,10 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa b_engine.add_pass("Debug Render Time", 1, "X", b_view_layer.name().c_str()); Pass::add(PASS_RENDER_TIME, passes, "Debug Render Time"); } + if (get_boolean(crp, "pass_debug_sample_count")) { + b_engine.add_pass("Debug Sample Count", 1, "X", b_view_layer.name().c_str()); + Pass::add(PASS_SAMPLE_COUNT, passes, "Debug Sample Count"); + } if (get_boolean(crp, "use_pass_volume_direct")) { b_engine.add_pass("VolumeDir", 3, "RGB", b_view_layer.name().c_str()); Pass::add(PASS_VOLUME_DIRECT, passes, "VolumeDir"); @@ -606,12 +633,12 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa /* Cryptomatte stores two ID/weight pairs per RGBA layer. * User facing parameter is the number of pairs. */ - int crypto_depth = min(16, get_int(crp, "pass_crypto_depth")) / 2; + int crypto_depth = min(16, get_int(crp, "pass_crypto_depth")); scene->film->cryptomatte_depth = crypto_depth; scene->film->cryptomatte_passes = CRYPT_NONE; if (get_boolean(crp, "use_pass_crypto_object")) { - for (int i = 0; i < crypto_depth; ++i) { - string passname = cryptomatte_prefix + string_printf("Object%02d", i); + for (int i = 0; i < crypto_depth; i += 2) { + string passname = cryptomatte_prefix + string_printf("Object%02d", i / 2); b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str()); Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str()); } @@ -619,8 +646,8 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa CRYPT_OBJECT); } if (get_boolean(crp, "use_pass_crypto_material")) { - for (int i = 0; i < crypto_depth; ++i) { - string passname = cryptomatte_prefix + string_printf("Material%02d", i); + for (int i = 0; i < crypto_depth; i += 2) { + string passname = cryptomatte_prefix + string_printf("Material%02d", i / 2); b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str()); Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str()); } @@ -628,8 +655,8 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa CRYPT_MATERIAL); } if (get_boolean(crp, "use_pass_crypto_asset")) { - for (int i = 0; i < crypto_depth; ++i) { - string passname = cryptomatte_prefix + string_printf("Asset%02d", i); + for (int i = 0; i < crypto_depth; i += 2) { + string passname = cryptomatte_prefix + string_printf("Asset%02d", i / 2); b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str()); Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str()); } @@ -641,6 +668,13 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa CRYPT_ACCURATE); } + if (adaptive_sampling) { + Pass::add(PASS_ADAPTIVE_AUX_BUFFER, passes); + if (!get_boolean(crp, "pass_debug_sample_count")) { + Pass::add(PASS_SAMPLE_COUNT, passes); + } + } + RNA_BEGIN (&crp, b_aov, "aovs") { bool is_color = (get_enum(b_aov, "type") == 1); string name = get_string(b_aov, "name"); @@ -832,6 +866,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine, /* other parameters */ params.start_resolution = get_int(cscene, "preview_start_resolution"); + params.denoising_start_sample = get_int(cscene, "preview_denoising_start_sample"); params.pixel_size = b_engine.get_preview_pixel_size(b_scene); /* other parameters */ @@ -841,20 +876,10 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine, /* progressive refine */ BL::RenderSettings b_r = b_scene.render(); - params.progressive_refine = (b_engine.is_preview() || - get_boolean(cscene, "use_progressive_refine")) && - !b_r.use_save_buffers(); - - if (params.progressive_refine) { - BL::Scene::view_layers_iterator b_view_layer; - for (b_scene.view_layers.begin(b_view_layer); b_view_layer != b_scene.view_layers.end(); - ++b_view_layer) { - PointerRNA crl = RNA_pointer_get(&b_view_layer->ptr, "cycles"); - if (get_boolean(crl, "use_denoising")) { - params.progressive_refine = false; - } - } - } + params.progressive_refine = b_engine.is_preview() || + get_boolean(cscene, "use_progressive_refine"); + if (b_r.use_save_buffers()) + params.progressive_refine = false; if (background) { if (params.progressive_refine) @@ -889,6 +914,8 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine, params.use_profiling = params.device.has_profiling && !b_engine.is_preview() && background && BlenderSession::print_render_stats; + params.adaptive_sampling = RNA_boolean_get(&cscene, "use_adaptive_sampling"); + return params; } diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h index a80f484fb92..650b4f5bb4e 100644 --- a/intern/cycles/blender/blender_sync.h +++ b/intern/cycles/blender/blender_sync.h @@ -18,11 +18,11 @@ #define __BLENDER_SYNC_H__ #include "MEM_guardedalloc.h" -#include "RNA_types.h" #include "RNA_access.h" #include "RNA_blender_cpp.h" +#include "RNA_types.h" -#include "blender/blender_util.h" +#include "blender/blender_id_map.h" #include "blender/blender_viewport.h" #include "render/scene.h" @@ -40,6 +40,7 @@ class BlenderObjectCulling; class BlenderViewportParameters; class Camera; class Film; +class Hair; class Light; class Mesh; class Object; @@ -70,7 +71,9 @@ class BlenderSync { int height, void **python_thread_state); void sync_view_layer(BL::SpaceView3D &b_v3d, BL::ViewLayer &b_view_layer); - vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer, BL::ViewLayer &b_view_layer); + vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer, + BL::ViewLayer &b_view_layer, + bool adaptive_sampling); void sync_integrator(); void sync_camera(BL::RenderSettings &b_render, BL::Object &b_override, @@ -94,7 +97,8 @@ class BlenderSync { BL::Scene &b_scene, bool background); static bool get_session_pause(BL::Scene &b_scene, bool background); - static BufferParams get_buffer_params(BL::RenderSettings &b_render, + static BufferParams get_buffer_params(BL::Scene &b_scene, + BL::RenderSettings &b_render, BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, Camera *cam, @@ -118,28 +122,64 @@ class BlenderSync { void **python_thread_state); void sync_film(BL::SpaceView3D &b_v3d); void sync_view(); + + /* Shader */ void sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, bool update_all); void sync_shaders(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d); - void sync_curve_settings(); - void sync_nodes(Shader *shader, BL::ShaderNodeTree &b_ntree); - Mesh *sync_mesh(BL::Depsgraph &b_depsgrpah, - BL::Object &b_ob, - BL::Object &b_ob_instance, - bool object_updated, - bool show_self, - bool show_particles); - void sync_curves( - Mesh *mesh, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step = 0); + + /* Object */ Object *sync_object(BL::Depsgraph &b_depsgraph, BL::ViewLayer &b_view_layer, BL::DepsgraphObjectInstance &b_instance, float motion_time, - bool show_self, - bool show_particles, + bool use_particle_hair, bool show_lights, BlenderObjectCulling &culling, bool *use_portal); + + /* Volume */ + void sync_volume(BL::Object &b_ob, Mesh *mesh, const vector<Shader *> &used_shaders); + + /* Mesh */ + void sync_mesh(BL::Depsgraph b_depsgraph, + BL::Object b_ob, + Mesh *mesh, + const vector<Shader *> &used_shaders); + void sync_mesh_motion(BL::Depsgraph b_depsgraph, BL::Object b_ob, Mesh *mesh, int motion_step); + + /* Hair */ + void sync_hair(BL::Depsgraph b_depsgraph, + BL::Object b_ob, + Geometry *geom, + const vector<Shader *> &used_shaders); + void sync_hair_motion(BL::Depsgraph b_depsgraph, + BL::Object b_ob, + Geometry *geom, + int motion_step); + void sync_hair(Hair *hair, BL::Object &b_ob, bool motion, int motion_step = 0); + void sync_particle_hair( + Geometry *geom, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step = 0); + void sync_curve_settings(); + bool object_has_particle_hair(BL::Object b_ob); + + /* Camera */ + void sync_camera_motion( + BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time); + + /* Geometry */ + Geometry *sync_geometry(BL::Depsgraph &b_depsgrpah, + BL::Object &b_ob, + BL::Object &b_ob_instance, + bool object_updated, + bool use_particle_hair); + void sync_geometry_motion(BL::Depsgraph &b_depsgraph, + BL::Object &b_ob, + Object *object, + float motion_time, + bool use_particle_hair); + + /* Light */ void sync_light(BL::Object &b_parent, int persistent_id[OBJECT_PERSISTENT_ID_SIZE], BL::Object &b_ob, @@ -148,14 +188,8 @@ class BlenderSync { Transform &tfm, bool *use_portal); void sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal); - void sync_mesh_motion(BL::Depsgraph &b_depsgraph, - BL::Object &b_ob, - Object *object, - float motion_time); - void sync_camera_motion( - BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time); - /* particles */ + /* Particles */ bool sync_dupli_particle(BL::Object &b_ob, BL::DepsgraphObjectInstance &b_instance, Object *object); @@ -179,11 +213,11 @@ class BlenderSync { id_map<void *, Shader> shader_map; id_map<ObjectKey, Object> object_map; - id_map<void *, Mesh> mesh_map; + id_map<GeometryKey, Geometry> geometry_map; id_map<ObjectKey, Light> light_map; id_map<ParticleSystemKey, ParticleSystem> particle_system_map; - set<Mesh *> mesh_synced; - set<Mesh *> mesh_motion_synced; + set<Geometry *> geometry_synced; + set<Geometry *> geometry_motion_synced; set<float> motion_times; void *world_map; bool world_recalc; @@ -203,6 +237,7 @@ class BlenderSync { use_background_ao(true), use_surfaces(true), use_hair(true), + use_volumes(true), samples(0), bound_samples(false) { @@ -214,6 +249,7 @@ class BlenderSync { bool use_background_ao; bool use_surfaces; bool use_hair; + bool use_volumes; int samples; bool bound_samples; } view_layer; diff --git a/intern/cycles/blender/blender_texture.h b/intern/cycles/blender/blender_texture.h index 896bf62da70..8ab061aaed9 100644 --- a/intern/cycles/blender/blender_texture.h +++ b/intern/cycles/blender/blender_texture.h @@ -17,8 +17,8 @@ #ifndef __BLENDER_TEXTURE_H__ #define __BLENDER_TEXTURE_H__ -#include <stdlib.h> #include "blender/blender_sync.h" +#include <stdlib.h> CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/blender/blender_util.h b/intern/cycles/blender/blender_util.h index bea30a20b8c..ad90a5f8d52 100644 --- a/intern/cycles/blender/blender_util.h +++ b/intern/cycles/blender/blender_util.h @@ -483,7 +483,9 @@ static inline void mesh_texture_space(BL::Mesh &b_mesh, float3 &loc, float3 &siz } /* Object motion steps, returns 0 if no motion blur needed. */ -static inline uint object_motion_steps(BL::Object &b_parent, BL::Object &b_ob) +static inline uint object_motion_steps(BL::Object &b_parent, + BL::Object &b_ob, + const int max_steps = INT_MAX) { /* Get motion enabled and steps from object itself. */ PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles"); @@ -492,7 +494,7 @@ static inline uint object_motion_steps(BL::Object &b_parent, BL::Object &b_ob) return 0; } - uint steps = max(1, get_int(cobject, "motion_steps")); + int steps = max(1, get_int(cobject, "motion_steps")); /* Also check parent object, so motion blur and steps can be * controlled by dupligroup duplicator for linked groups. */ @@ -510,7 +512,7 @@ static inline uint object_motion_steps(BL::Object &b_parent, BL::Object &b_ob) /* Use uneven number of steps so we get one keyframe at the current frame, * and use 2^(steps - 1) so objects with more/fewer steps still have samples * at the same times, to avoid sampling at many different times. */ - return (2 << (steps - 1)) + 1; + return min((2 << (steps - 1)) + 1, max_steps); } /* object uses deformation motion blur */ @@ -531,7 +533,7 @@ static inline bool object_use_deform_motion(BL::Object &b_parent, BL::Object &b_ return use_deform_motion; } -static inline BL::FluidDomainSettings object_fluid_domain_find(BL::Object &b_ob) +static inline BL::FluidDomainSettings object_fluid_liquid_domain_find(BL::Object &b_ob) { BL::Object::modifiers_iterator b_mod; @@ -539,8 +541,28 @@ static inline BL::FluidDomainSettings object_fluid_domain_find(BL::Object &b_ob) if (b_mod->is_a(&RNA_FluidModifier)) { BL::FluidModifier b_mmd(*b_mod); - if (b_mmd.fluid_type() == BL::FluidModifier::fluid_type_DOMAIN) + if (b_mmd.fluid_type() == BL::FluidModifier::fluid_type_DOMAIN && + b_mmd.domain_settings().domain_type() == BL::FluidDomainSettings::domain_type_LIQUID) { return b_mmd.domain_settings(); + } + } + } + + return BL::FluidDomainSettings(PointerRNA_NULL); +} + +static inline BL::FluidDomainSettings object_fluid_gas_domain_find(BL::Object &b_ob) +{ + BL::Object::modifiers_iterator b_mod; + + for (b_ob.modifiers.begin(b_mod); b_mod != b_ob.modifiers.end(); ++b_mod) { + if (b_mod->is_a(&RNA_FluidModifier)) { + BL::FluidModifier b_mmd(*b_mod); + + if (b_mmd.fluid_type() == BL::FluidModifier::fluid_type_DOMAIN && + b_mmd.domain_settings().domain_type() == BL::FluidDomainSettings::domain_type_GAS) { + return b_mmd.domain_settings(); + } } } @@ -573,209 +595,20 @@ static inline Mesh::SubdivisionType object_subdivision_type(BL::Object &b_ob, return Mesh::SUBDIVISION_NONE; } -/* ID Map - * - * Utility class to keep in sync with blender data. - * Used for objects, meshes, lights and shaders. */ - -template<typename K, typename T> class id_map { - public: - id_map(vector<T *> *scene_data_) - { - scene_data = scene_data_; - } - - T *find(const BL::ID &id) - { - return find(id.ptr.owner_id); - } - - T *find(const K &key) - { - if (b_map.find(key) != b_map.end()) { - T *data = b_map[key]; - return data; - } - - return NULL; - } - - void set_recalc(const BL::ID &id) - { - b_recalc.insert(id.ptr.data); - } - - void set_recalc(void *id_ptr) - { - b_recalc.insert(id_ptr); - } - - bool has_recalc() - { - return !(b_recalc.empty()); - } - - void pre_sync() - { - used_set.clear(); - } - - bool sync(T **r_data, const BL::ID &id) - { - return sync(r_data, id, id, id.ptr.owner_id); - } - - bool sync(T **r_data, const BL::ID &id, const BL::ID &parent, const K &key) - { - T *data = find(key); - bool recalc; - - if (!data) { - /* add data if it didn't exist yet */ - data = new T(); - scene_data->push_back(data); - b_map[key] = data; - recalc = true; - } - else { - recalc = (b_recalc.find(id.ptr.data) != b_recalc.end()); - if (parent.ptr.data) - recalc = recalc || (b_recalc.find(parent.ptr.data) != b_recalc.end()); - } - - used(data); - - *r_data = data; - return recalc; - } - - bool is_used(const K &key) - { - T *data = find(key); - return (data) ? used_set.find(data) != used_set.end() : false; - } - - void used(T *data) - { - /* tag data as still in use */ - used_set.insert(data); - } - - void set_default(T *data) - { - b_map[NULL] = data; - } - - bool post_sync(bool do_delete = true) - { - /* remove unused data */ - vector<T *> new_scene_data; - typename vector<T *>::iterator it; - bool deleted = false; - - for (it = scene_data->begin(); it != scene_data->end(); it++) { - T *data = *it; - - if (do_delete && used_set.find(data) == used_set.end()) { - delete data; - deleted = true; - } - else - new_scene_data.push_back(data); - } - - *scene_data = new_scene_data; - - /* update mapping */ - map<K, T *> new_map; - typedef pair<const K, T *> TMapPair; - typename map<K, T *>::iterator jt; - - for (jt = b_map.begin(); jt != b_map.end(); jt++) { - TMapPair &pair = *jt; - - if (used_set.find(pair.second) != used_set.end()) - new_map[pair.first] = pair.second; - } - - used_set.clear(); - b_recalc.clear(); - b_map = new_map; - - return deleted; - } - - const map<K, T *> &key_to_scene_data() - { - return b_map; - } - - protected: - vector<T *> *scene_data; - map<K, T *> b_map; - set<T *> used_set; - set<void *> b_recalc; -}; - -/* Object Key */ - -enum { OBJECT_PERSISTENT_ID_SIZE = 16 }; - -struct ObjectKey { - void *parent; - int id[OBJECT_PERSISTENT_ID_SIZE]; - void *ob; - - ObjectKey(void *parent_, int id_[OBJECT_PERSISTENT_ID_SIZE], void *ob_) - : parent(parent_), ob(ob_) - { - if (id_) - memcpy(id, id_, sizeof(id)); - else - memset(id, 0, sizeof(id)); - } - - bool operator<(const ObjectKey &k) const - { - if (ob < k.ob) { - return true; - } - else if (ob == k.ob) { - if (parent < k.parent) - return true; - else if (parent == k.parent) - return memcmp(id, k.id, sizeof(id)) < 0; - } - - return false; - } -}; - -/* Particle System Key */ - -struct ParticleSystemKey { - void *ob; - int id[OBJECT_PERSISTENT_ID_SIZE]; - - ParticleSystemKey(void *ob_, int id_[OBJECT_PERSISTENT_ID_SIZE]) : ob(ob_) - { - if (id_) - memcpy(id, id_, sizeof(id)); - else - memset(id, 0, sizeof(id)); - } +static inline uint object_ray_visibility(BL::Object &b_ob) +{ + PointerRNA cvisibility = RNA_pointer_get(&b_ob.ptr, "cycles_visibility"); + uint flag = 0; - bool operator<(const ParticleSystemKey &k) const - { - /* first id is particle index, we don't compare that */ - if (ob < k.ob) - return true; - else if (ob == k.ob) - return memcmp(id + 1, k.id + 1, sizeof(int) * (OBJECT_PERSISTENT_ID_SIZE - 1)) < 0; + flag |= get_boolean(cvisibility, "camera") ? PATH_RAY_CAMERA : 0; + flag |= get_boolean(cvisibility, "diffuse") ? PATH_RAY_DIFFUSE : 0; + flag |= get_boolean(cvisibility, "glossy") ? PATH_RAY_GLOSSY : 0; + flag |= get_boolean(cvisibility, "transmission") ? PATH_RAY_TRANSMIT : 0; + flag |= get_boolean(cvisibility, "shadow") ? PATH_RAY_SHADOW : 0; + flag |= get_boolean(cvisibility, "scatter") ? PATH_RAY_VOLUME_SCATTER : 0; - return false; - } -}; + return flag; +} class EdgeMap { public: diff --git a/intern/cycles/blender/blender_viewport.cpp b/intern/cycles/blender/blender_viewport.cpp index 73ef5f94720..93e84e28032 100644 --- a/intern/cycles/blender/blender_viewport.cpp +++ b/intern/cycles/blender/blender_viewport.cpp @@ -61,6 +61,17 @@ const bool BlenderViewportParameters::custom_viewport_parameters() const return !(use_scene_world && use_scene_lights); } +bool BlenderViewportParameters::get_viewport_display_denoising(BL::SpaceView3D &b_v3d, + BL::Scene &b_scene) +{ + bool use_denoising = false; + if (b_v3d) { + PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles"); + use_denoising = get_enum(cscene, "preview_denoising") != 0; + } + return use_denoising; +} + PassType BlenderViewportParameters::get_viewport_display_render_pass(BL::SpaceView3D &b_v3d) { PassType display_pass = PASS_NONE; @@ -72,6 +83,11 @@ PassType BlenderViewportParameters::get_viewport_display_render_pass(BL::SpaceVi return display_pass; } +bool update_viewport_display_denoising(BL::SpaceView3D &b_v3d, BL::Scene &b_scene) +{ + return BlenderViewportParameters::get_viewport_display_denoising(b_v3d, b_scene); +} + PassType update_viewport_display_passes(BL::SpaceView3D &b_v3d, vector<Pass> &passes) { if (b_v3d) { diff --git a/intern/cycles/blender/blender_viewport.h b/intern/cycles/blender/blender_viewport.h index f26d0d38115..3e44e552f1d 100644 --- a/intern/cycles/blender/blender_viewport.h +++ b/intern/cycles/blender/blender_viewport.h @@ -18,9 +18,9 @@ #define __BLENDER_VIEWPORT_H__ #include "MEM_guardedalloc.h" -#include "RNA_types.h" #include "RNA_access.h" #include "RNA_blender_cpp.h" +#include "RNA_types.h" #include "render/film.h" #include "util/util_param.h" @@ -44,11 +44,15 @@ class BlenderViewportParameters { friend class BlenderSync; public: + /* Get whether to enable denoising data pass in viewport. */ + static bool get_viewport_display_denoising(BL::SpaceView3D &b_v3d, BL::Scene &b_scene); /* Retrieve the render pass that needs to be displayed on the given `SpaceView3D` * When the `b_v3d` parameter is not given `PASS_NONE` will be returned. */ static PassType get_viewport_display_render_pass(BL::SpaceView3D &b_v3d); }; +bool update_viewport_display_denoising(BL::SpaceView3D &b_v3d, BL::Scene &b_scene); + PassType update_viewport_display_passes(BL::SpaceView3D &b_v3d, vector<Pass> &passes); CCL_NAMESPACE_END diff --git a/intern/cycles/blender/blender_volume.cpp b/intern/cycles/blender/blender_volume.cpp new file mode 100644 index 00000000000..6254a1a1b24 --- /dev/null +++ b/intern/cycles/blender/blender_volume.cpp @@ -0,0 +1,379 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "render/colorspace.h" +#include "render/image.h" +#include "render/image_vdb.h" +#include "render/mesh.h" +#include "render/object.h" + +#include "blender/blender_sync.h" +#include "blender/blender_util.h" + +#ifdef WITH_OPENVDB +# include <openvdb/openvdb.h> +openvdb::GridBase::ConstPtr BKE_volume_grid_openvdb_for_read(const struct Volume *volume, + struct VolumeGrid *grid); +#endif + +CCL_NAMESPACE_BEGIN + +/* TODO: verify this is not loading unnecessary attributes. */ +class BlenderSmokeLoader : public ImageLoader { + public: + BlenderSmokeLoader(const BL::Object &b_ob, AttributeStandard attribute) + : b_ob(b_ob), attribute(attribute) + { + } + + bool load_metadata(ImageMetaData &metadata) override + { + BL::FluidDomainSettings b_domain = object_fluid_gas_domain_find(b_ob); + + if (!b_domain) { + return false; + } + + if (attribute == ATTR_STD_VOLUME_DENSITY || attribute == ATTR_STD_VOLUME_FLAME || + attribute == ATTR_STD_VOLUME_HEAT || attribute == ATTR_STD_VOLUME_TEMPERATURE) { + metadata.type = IMAGE_DATA_TYPE_FLOAT; + metadata.channels = 1; + } + else if (attribute == ATTR_STD_VOLUME_COLOR) { + metadata.type = IMAGE_DATA_TYPE_FLOAT4; + metadata.channels = 4; + } + else if (attribute == ATTR_STD_VOLUME_VELOCITY) { + metadata.type = IMAGE_DATA_TYPE_FLOAT4; + metadata.channels = 3; + } + else { + return false; + } + + int3 resolution = get_int3(b_domain.domain_resolution()); + int amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1; + + /* Velocity and heat data is always low-resolution. */ + if (attribute == ATTR_STD_VOLUME_VELOCITY || attribute == ATTR_STD_VOLUME_HEAT) { + amplify = 1; + } + + metadata.width = resolution.x * amplify; + metadata.height = resolution.y * amplify; + metadata.depth = resolution.z * amplify; + + /* Create a matrix to transform from object space to mesh texture space. + * This does not work with deformations but that can probably only be done + * well with a volume grid mapping of coordinates. */ + BL::Mesh b_mesh(b_ob.data()); + float3 loc, size; + mesh_texture_space(b_mesh, loc, size); + metadata.transform_3d = transform_translate(-loc) * transform_scale(size); + metadata.use_transform_3d = true; + + return true; + } + + bool load_pixels(const ImageMetaData &, void *pixels, const size_t, const bool) override + { + /* smoke volume data */ + BL::FluidDomainSettings b_domain = object_fluid_gas_domain_find(b_ob); + + if (!b_domain) { + return false; + } +#ifdef WITH_FLUID + int3 resolution = get_int3(b_domain.domain_resolution()); + int length, amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1; + + /* Velocity and heat data is always low-resolution. */ + if (attribute == ATTR_STD_VOLUME_VELOCITY || attribute == ATTR_STD_VOLUME_HEAT) { + amplify = 1; + } + + const int width = resolution.x * amplify; + const int height = resolution.y * amplify; + const int depth = resolution.z * amplify; + const size_t num_pixels = ((size_t)width) * height * depth; + + float *fpixels = (float *)pixels; + + if (attribute == ATTR_STD_VOLUME_DENSITY) { + FluidDomainSettings_density_grid_get_length(&b_domain.ptr, &length); + if (length == num_pixels) { + FluidDomainSettings_density_grid_get(&b_domain.ptr, fpixels); + return true; + } + } + else if (attribute == ATTR_STD_VOLUME_FLAME) { + /* this is in range 0..1, and interpreted by the OpenGL smoke viewer + * as 1500..3000 K with the first part faded to zero density */ + FluidDomainSettings_flame_grid_get_length(&b_domain.ptr, &length); + if (length == num_pixels) { + FluidDomainSettings_flame_grid_get(&b_domain.ptr, fpixels); + return true; + } + } + else if (attribute == ATTR_STD_VOLUME_COLOR) { + /* the RGB is "premultiplied" by density for better interpolation results */ + FluidDomainSettings_color_grid_get_length(&b_domain.ptr, &length); + if (length == num_pixels * 4) { + FluidDomainSettings_color_grid_get(&b_domain.ptr, fpixels); + return true; + } + } + else if (attribute == ATTR_STD_VOLUME_VELOCITY) { + FluidDomainSettings_velocity_grid_get_length(&b_domain.ptr, &length); + if (length == num_pixels * 3) { + FluidDomainSettings_velocity_grid_get(&b_domain.ptr, fpixels); + return true; + } + } + else if (attribute == ATTR_STD_VOLUME_HEAT) { + FluidDomainSettings_heat_grid_get_length(&b_domain.ptr, &length); + if (length == num_pixels) { + FluidDomainSettings_heat_grid_get(&b_domain.ptr, fpixels); + return true; + } + } + else if (attribute == ATTR_STD_VOLUME_TEMPERATURE) { + FluidDomainSettings_temperature_grid_get_length(&b_domain.ptr, &length); + if (length == num_pixels) { + FluidDomainSettings_temperature_grid_get(&b_domain.ptr, fpixels); + return true; + } + } + else { + fprintf(stderr, + "Cycles error: unknown volume attribute %s, skipping\n", + Attribute::standard_name(attribute)); + fpixels[0] = 0.0f; + return false; + } +#else + (void)pixels; +#endif + fprintf(stderr, "Cycles error: unexpected smoke volume resolution, skipping\n"); + return false; + } + + string name() const override + { + return Attribute::standard_name(attribute); + } + + bool equals(const ImageLoader &other) const override + { + const BlenderSmokeLoader &other_loader = (const BlenderSmokeLoader &)other; + return b_ob == other_loader.b_ob && attribute == other_loader.attribute; + } + + BL::Object b_ob; + AttributeStandard attribute; +}; + +static void sync_smoke_volume(Scene *scene, BL::Object &b_ob, Mesh *mesh, float frame) +{ + BL::FluidDomainSettings b_domain = object_fluid_gas_domain_find(b_ob); + if (!b_domain) { + return; + } + + AttributeStandard attributes[] = {ATTR_STD_VOLUME_DENSITY, + ATTR_STD_VOLUME_COLOR, + ATTR_STD_VOLUME_FLAME, + ATTR_STD_VOLUME_HEAT, + ATTR_STD_VOLUME_TEMPERATURE, + ATTR_STD_VOLUME_VELOCITY, + ATTR_STD_NONE}; + + for (int i = 0; attributes[i] != ATTR_STD_NONE; i++) { + AttributeStandard std = attributes[i]; + if (!mesh->need_attribute(scene, std)) { + continue; + } + + mesh->volume_clipping = b_domain.clipping(); + + Attribute *attr = mesh->attributes.add(std); + + ImageLoader *loader = new BlenderSmokeLoader(b_ob, std); + ImageParams params; + params.frame = frame; + + attr->data_voxel() = scene->image_manager->add_image(loader, params); + } +} + +class BlenderVolumeLoader : public VDBImageLoader { + public: + BlenderVolumeLoader(BL::Volume b_volume, const string &grid_name) + : VDBImageLoader(grid_name), + b_volume(b_volume), + b_volume_grid(PointerRNA_NULL), + unload(false) + { +#ifdef WITH_OPENVDB + /* Find grid with matching name. */ + BL::Volume::grids_iterator b_grid_iter; + for (b_volume.grids.begin(b_grid_iter); b_grid_iter != b_volume.grids.end(); ++b_grid_iter) { + if (b_grid_iter->name() == grid_name) { + b_volume_grid = *b_grid_iter; + } + } +#endif + } + + bool load_metadata(ImageMetaData &metadata) override + { + if (!b_volume_grid) { + return false; + } + + unload = !b_volume_grid.is_loaded(); + +#ifdef WITH_OPENVDB + Volume *volume = (Volume *)b_volume.ptr.data; + VolumeGrid *volume_grid = (VolumeGrid *)b_volume_grid.ptr.data; + grid = BKE_volume_grid_openvdb_for_read(volume, volume_grid); +#endif + + return VDBImageLoader::load_metadata(metadata); + } + + bool load_pixels(const ImageMetaData &metadata, + void *pixels, + const size_t pixel_size, + const bool associate_alpha) override + { + if (!b_volume_grid) { + return false; + } + + return VDBImageLoader::load_pixels(metadata, pixels, pixel_size, associate_alpha); + } + + bool equals(const ImageLoader &other) const override + { + /* TODO: detect multiple volume datablocks with the same filepath. */ + const BlenderVolumeLoader &other_loader = (const BlenderVolumeLoader &)other; + return b_volume == other_loader.b_volume && b_volume_grid == other_loader.b_volume_grid; + } + + void cleanup() override + { + VDBImageLoader::cleanup(); + if (b_volume_grid && unload) { + b_volume_grid.unload(); + } + } + + BL::Volume b_volume; + BL::VolumeGrid b_volume_grid; + bool unload; +}; + +static void sync_volume_object(BL::BlendData &b_data, BL::Object &b_ob, Scene *scene, Mesh *mesh) +{ + BL::Volume b_volume(b_ob.data()); + b_volume.grids.load(b_data.ptr.data); + + BL::VolumeRender b_render(b_volume.render()); + + mesh->volume_clipping = b_render.clipping(); + mesh->volume_step_size = b_render.step_size(); + mesh->volume_object_space = (b_render.space() == BL::VolumeRender::space_OBJECT); + + /* Find grid with matching name. */ + BL::Volume::grids_iterator b_grid_iter; + for (b_volume.grids.begin(b_grid_iter); b_grid_iter != b_volume.grids.end(); ++b_grid_iter) { + BL::VolumeGrid b_grid = *b_grid_iter; + ustring name = ustring(b_grid.name()); + AttributeStandard std = ATTR_STD_NONE; + + if (name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY)) { + std = ATTR_STD_VOLUME_DENSITY; + } + else if (name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR)) { + std = ATTR_STD_VOLUME_COLOR; + } + else if (name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME)) { + std = ATTR_STD_VOLUME_FLAME; + } + else if (name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) { + std = ATTR_STD_VOLUME_HEAT; + } + else if (name == Attribute::standard_name(ATTR_STD_VOLUME_TEMPERATURE)) { + std = ATTR_STD_VOLUME_TEMPERATURE; + } + else if (name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY)) { + std = ATTR_STD_VOLUME_VELOCITY; + } + + if ((std != ATTR_STD_NONE && mesh->need_attribute(scene, std)) || + mesh->need_attribute(scene, name)) { + Attribute *attr = (std != ATTR_STD_NONE) ? + mesh->attributes.add(std) : + mesh->attributes.add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_VOXEL); + + ImageLoader *loader = new BlenderVolumeLoader(b_volume, name.string()); + ImageParams params; + params.frame = b_volume.grids.frame(); + + attr->data_voxel() = scene->image_manager->add_image(loader, params); + } + } +} + +/* If the voxel attributes change, we need to rebuild the bounding mesh. */ +static vector<int> get_voxel_image_slots(Mesh *mesh) +{ + vector<int> slots; + for (const Attribute &attr : mesh->attributes.attributes) { + if (attr.element == ATTR_ELEMENT_VOXEL) { + slots.push_back(attr.data_voxel().svm_slot()); + } + } + + return slots; +} + +void BlenderSync::sync_volume(BL::Object &b_ob, Mesh *mesh, const vector<Shader *> &used_shaders) +{ + vector<int> old_voxel_slots = get_voxel_image_slots(mesh); + + mesh->clear(); + mesh->used_shaders = used_shaders; + + if (view_layer.use_volumes) { + if (b_ob.type() == BL::Object::type_VOLUME) { + /* Volume object. Create only attributes, bounding mesh will then + * be automatically generated later. */ + sync_volume_object(b_data, b_ob, scene, mesh); + } + else { + /* Smoke domain. */ + sync_smoke_volume(scene, b_ob, mesh, b_scene.frame_current()); + } + } + + /* Tag update. */ + bool rebuild = (old_voxel_slots != get_voxel_image_slots(mesh)); + mesh->tag_update(scene, rebuild); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp index 16c721da06a..e6502a40313 100644 --- a/intern/cycles/bvh/bvh.cpp +++ b/intern/cycles/bvh/bvh.cpp @@ -17,6 +17,7 @@ #include "bvh/bvh.h" +#include "render/hair.h" #include "render/mesh.h" #include "render/object.h" @@ -99,31 +100,33 @@ int BVHStackEntry::encodeIdx() const /* BVH */ -BVH::BVH(const BVHParams ¶ms_, const vector<Mesh *> &meshes_, const vector<Object *> &objects_) - : params(params_), meshes(meshes_), objects(objects_) +BVH::BVH(const BVHParams ¶ms_, + const vector<Geometry *> &geometry_, + const vector<Object *> &objects_) + : params(params_), geometry(geometry_), objects(objects_) { } BVH *BVH::create(const BVHParams ¶ms, - const vector<Mesh *> &meshes, + const vector<Geometry *> &geometry, const vector<Object *> &objects) { switch (params.bvh_layout) { case BVH_LAYOUT_BVH2: - return new BVH2(params, meshes, objects); + return new BVH2(params, geometry, objects); case BVH_LAYOUT_BVH4: - return new BVH4(params, meshes, objects); + return new BVH4(params, geometry, objects); case BVH_LAYOUT_BVH8: - return new BVH8(params, meshes, objects); + return new BVH8(params, geometry, objects); case BVH_LAYOUT_EMBREE: #ifdef WITH_EMBREE - return new BVHEmbree(params, meshes, objects); + return new BVHEmbree(params, geometry, objects); #else break; #endif case BVH_LAYOUT_OPTIX: #ifdef WITH_OPTIX - return new BVHOptiX(params, meshes, objects); + return new BVHOptiX(params, geometry, objects); #else break; #endif @@ -217,36 +220,36 @@ void BVH::refit_primitives(int start, int end, BoundBox &bbox, uint &visibility) } else { /* Primitives. */ - const Mesh *mesh = ob->mesh; - if (pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) { /* Curves. */ - int str_offset = (params.top_level) ? mesh->curve_offset : 0; - Mesh::Curve curve = mesh->get_curve(pidx - str_offset); + const Hair *hair = static_cast<const Hair *>(ob->geometry); + int prim_offset = (params.top_level) ? hair->prim_offset : 0; + Hair::Curve curve = hair->get_curve(pidx - prim_offset); int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]); - curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox); + curve.bounds_grow(k, &hair->curve_keys[0], &hair->curve_radius[0], bbox); visibility |= PATH_RAY_CURVE; /* Motion curves. */ - if (mesh->use_motion_blur) { - Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + if (hair->use_motion_blur) { + Attribute *attr = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); if (attr) { - size_t mesh_size = mesh->curve_keys.size(); - size_t steps = mesh->motion_steps - 1; + size_t hair_size = hair->curve_keys.size(); + size_t steps = hair->motion_steps - 1; float3 *key_steps = attr->data_float3(); for (size_t i = 0; i < steps; i++) - curve.bounds_grow(k, key_steps + i * mesh_size, &mesh->curve_radius[0], bbox); + curve.bounds_grow(k, key_steps + i * hair_size, &hair->curve_radius[0], bbox); } } } else { /* Triangles. */ - int tri_offset = (params.top_level) ? mesh->tri_offset : 0; - Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset); + const Mesh *mesh = static_cast<const Mesh *>(ob->geometry); + int prim_offset = (params.top_level) ? mesh->prim_offset : 0; + Mesh::Triangle triangle = mesh->get_triangle(pidx - prim_offset); const float3 *vpos = &mesh->verts[0]; triangle.bounds_grow(vpos, bbox); @@ -276,7 +279,7 @@ void BVH::pack_triangle(int idx, float4 tri_verts[3]) { int tob = pack.prim_object[idx]; assert(tob >= 0 && tob < objects.size()); - const Mesh *mesh = objects[tob]->mesh; + const Mesh *mesh = static_cast<const Mesh *>(objects[tob]->geometry); int tidx = pack.prim_index[idx]; Mesh::Triangle t = mesh->get_triangle(tidx); @@ -347,15 +350,13 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size) const bool use_obvh = (params.bvh_layout == BVH_LAYOUT_BVH8); /* Adjust primitive index to point to the triangle in the global array, for - * meshes with transform applied and already in the top level BVH. + * geometry with transform applied and already in the top level BVH. */ - for (size_t i = 0; i < pack.prim_index.size(); i++) + for (size_t i = 0; i < pack.prim_index.size(); i++) { if (pack.prim_index[i] != -1) { - if (pack.prim_type[i] & PRIMITIVE_ALL_CURVE) - pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->curve_offset; - else - pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->tri_offset; + pack.prim_index[i] += objects[pack.prim_object[i]]->geometry->prim_offset; } + } /* track offsets of instanced BVH data in global array */ size_t prim_offset = pack.prim_index.size(); @@ -375,10 +376,10 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size) size_t pack_leaf_nodes_offset = leaf_nodes_size; size_t object_offset = 0; - foreach (Mesh *mesh, meshes) { - BVH *bvh = mesh->bvh; + foreach (Geometry *geom, geometry) { + BVH *bvh = geom->bvh; - if (mesh->need_build_bvh(params.bvh_layout)) { + if (geom->need_build_bvh(params.bvh_layout)) { prim_index_size += bvh->pack.prim_index.size(); prim_tri_verts_size += bvh->pack.prim_tri_verts.size(); nodes_size += bvh->pack.nodes.size(); @@ -410,36 +411,35 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size) int4 *pack_leaf_nodes = (pack.leaf_nodes.size()) ? &pack.leaf_nodes[0] : NULL; float2 *pack_prim_time = (pack.prim_time.size()) ? &pack.prim_time[0] : NULL; - map<Mesh *, int> mesh_map; + map<Geometry *, int> geometry_map; /* merge */ foreach (Object *ob, objects) { - Mesh *mesh = ob->mesh; + Geometry *geom = ob->geometry; /* We assume that if mesh doesn't need own BVH it was already included * into a top-level BVH and no packing here is needed. */ - if (!mesh->need_build_bvh(params.bvh_layout)) { + if (!geom->need_build_bvh(params.bvh_layout)) { pack.object_node[object_offset++] = 0; continue; } /* if mesh already added once, don't add it again, but used set * node offset for this object */ - map<Mesh *, int>::iterator it = mesh_map.find(mesh); + map<Geometry *, int>::iterator it = geometry_map.find(geom); - if (mesh_map.find(mesh) != mesh_map.end()) { + if (geometry_map.find(geom) != geometry_map.end()) { int noffset = it->second; pack.object_node[object_offset++] = noffset; continue; } - BVH *bvh = mesh->bvh; + BVH *bvh = geom->bvh; int noffset = nodes_offset; int noffset_leaf = nodes_leaf_offset; - int mesh_tri_offset = mesh->tri_offset; - int mesh_curve_offset = mesh->curve_offset; + int geom_prim_offset = geom->prim_offset; /* fill in node indexes for instances */ if (bvh->pack.root_index == -1) @@ -447,7 +447,7 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size) else pack.object_node[object_offset++] = noffset; - mesh_map[mesh] = pack.object_node[object_offset - 1]; + geometry_map[geom] = pack.object_node[object_offset - 1]; /* merge primitive, object and triangle indexes */ if (bvh->pack.prim_index.size()) { @@ -460,11 +460,11 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size) for (size_t i = 0; i < bvh_prim_index_size; i++) { if (bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) { - pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_curve_offset; + pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset; pack_prim_tri_index[pack_prim_index_offset] = -1; } else { - pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset; + pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset; pack_prim_tri_index[pack_prim_index_offset] = bvh_prim_tri_index[i] + pack_prim_tri_verts_offset; } @@ -535,8 +535,9 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size) /* Modify offsets into arrays */ int4 data = bvh_nodes[i + nsize_bbox]; - int4 data1 = bvh_nodes[i + nsize_bbox - 1]; + if (use_obvh) { + int4 data1 = bvh_nodes[i + nsize_bbox - 1]; data.z += (data.z < 0) ? -noffset_leaf : noffset; data.w += (data.w < 0) ? -noffset_leaf : noffset; data.x += (data.x < 0) ? -noffset_leaf : noffset; @@ -545,6 +546,8 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size) data1.w += (data1.w < 0) ? -noffset_leaf : noffset; data1.x += (data1.x < 0) ? -noffset_leaf : noffset; data1.y += (data1.y < 0) ? -noffset_leaf : noffset; + pack_nodes[pack_nodes_offset + nsize_bbox] = data; + pack_nodes[pack_nodes_offset + nsize_bbox - 1] = data1; } else { data.z += (data.z < 0) ? -noffset_leaf : noffset; @@ -553,10 +556,7 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size) data.x += (data.x < 0) ? -noffset_leaf : noffset; data.y += (data.y < 0) ? -noffset_leaf : noffset; } - } - pack_nodes[pack_nodes_offset + nsize_bbox] = data; - if (use_obvh) { - pack_nodes[pack_nodes_offset + nsize_bbox - 1] = data1; + pack_nodes[pack_nodes_offset + nsize_bbox] = data; } /* Usually this copies nothing, but we better diff --git a/intern/cycles/bvh/bvh.h b/intern/cycles/bvh/bvh.h index 92082e4de86..bdde38640c9 100644 --- a/intern/cycles/bvh/bvh.h +++ b/intern/cycles/bvh/bvh.h @@ -33,7 +33,7 @@ struct BVHStackEntry; class BVHParams; class BoundBox; class LeafNode; -class Mesh; +class Geometry; class Object; class Progress; @@ -84,11 +84,11 @@ class BVH { public: PackedBVH pack; BVHParams params; - vector<Mesh *> meshes; + vector<Geometry *> geometry; vector<Object *> objects; static BVH *create(const BVHParams ¶ms, - const vector<Mesh *> &meshes, + const vector<Geometry *> &geometry, const vector<Object *> &objects); virtual ~BVH() { @@ -102,7 +102,9 @@ class BVH { void refit(Progress &progress); protected: - BVH(const BVHParams ¶ms, const vector<Mesh *> &meshes, const vector<Object *> &objects); + BVH(const BVHParams ¶ms, + const vector<Geometry *> &geometry, + const vector<Object *> &objects); /* Refit range of primitives. */ void refit_primitives(int start, int end, BoundBox &bbox, uint &visibility); diff --git a/intern/cycles/bvh/bvh2.cpp b/intern/cycles/bvh/bvh2.cpp index b1a9148c297..c903070429e 100644 --- a/intern/cycles/bvh/bvh2.cpp +++ b/intern/cycles/bvh/bvh2.cpp @@ -26,9 +26,9 @@ CCL_NAMESPACE_BEGIN BVH2::BVH2(const BVHParams ¶ms_, - const vector<Mesh *> &meshes_, + const vector<Geometry *> &geometry_, const vector<Object *> &objects_) - : BVH(params_, meshes_, objects_) + : BVH(params_, geometry_, objects_) { } diff --git a/intern/cycles/bvh/bvh2.h b/intern/cycles/bvh/bvh2.h index a3eaff9cf65..fa3e45b72d2 100644 --- a/intern/cycles/bvh/bvh2.h +++ b/intern/cycles/bvh/bvh2.h @@ -46,7 +46,9 @@ class BVH2 : public BVH { protected: /* constructor */ friend class BVH; - BVH2(const BVHParams ¶ms, const vector<Mesh *> &meshes, const vector<Object *> &objects); + BVH2(const BVHParams ¶ms, + const vector<Geometry *> &geometry, + const vector<Object *> &objects); /* Building process. */ virtual BVHNode *widen_children_nodes(const BVHNode *root) override; diff --git a/intern/cycles/bvh/bvh4.cpp b/intern/cycles/bvh/bvh4.cpp index 89b42ee1d21..143c3e54f94 100644 --- a/intern/cycles/bvh/bvh4.cpp +++ b/intern/cycles/bvh/bvh4.cpp @@ -32,9 +32,9 @@ CCL_NAMESPACE_BEGIN */ BVH4::BVH4(const BVHParams ¶ms_, - const vector<Mesh *> &meshes_, + const vector<Geometry *> &geometry_, const vector<Object *> &objects_) - : BVH(params_, meshes_, objects_) + : BVH(params_, geometry_, objects_) { params.bvh_layout = BVH_LAYOUT_BVH4; } diff --git a/intern/cycles/bvh/bvh4.h b/intern/cycles/bvh/bvh4.h index c44f2833c84..afbb9007afb 100644 --- a/intern/cycles/bvh/bvh4.h +++ b/intern/cycles/bvh/bvh4.h @@ -46,7 +46,9 @@ class BVH4 : public BVH { protected: /* constructor */ friend class BVH; - BVH4(const BVHParams ¶ms, const vector<Mesh *> &meshes, const vector<Object *> &objects); + BVH4(const BVHParams ¶ms, + const vector<Geometry *> &geometry, + const vector<Object *> &objects); /* Building process. */ virtual BVHNode *widen_children_nodes(const BVHNode *root) override; diff --git a/intern/cycles/bvh/bvh8.cpp b/intern/cycles/bvh/bvh8.cpp index d3516525f78..342dd9e85a5 100644 --- a/intern/cycles/bvh/bvh8.cpp +++ b/intern/cycles/bvh/bvh8.cpp @@ -28,6 +28,7 @@ #include "bvh/bvh8.h" +#include "render/hair.h" #include "render/mesh.h" #include "render/object.h" @@ -37,9 +38,9 @@ CCL_NAMESPACE_BEGIN BVH8::BVH8(const BVHParams ¶ms_, - const vector<Mesh *> &meshes_, + const vector<Geometry *> &geometry_, const vector<Object *> &objects_) - : BVH(params_, meshes_, objects_) + : BVH(params_, geometry_, objects_) { } @@ -429,37 +430,37 @@ void BVH8::refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility) } else { /* Primitives. */ - const Mesh *mesh = ob->mesh; - if (pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) { /* Curves. */ - int str_offset = (params.top_level) ? mesh->curve_offset : 0; - Mesh::Curve curve = mesh->get_curve(pidx - str_offset); + const Hair *hair = static_cast<const Hair *>(ob->geometry); + int prim_offset = (params.top_level) ? hair->prim_offset : 0; + Hair::Curve curve = hair->get_curve(pidx - prim_offset); int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]); - curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox); + curve.bounds_grow(k, &hair->curve_keys[0], &hair->curve_radius[0], bbox); visibility |= PATH_RAY_CURVE; /* Motion curves. */ - if (mesh->use_motion_blur) { - Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + if (hair->use_motion_blur) { + Attribute *attr = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); if (attr) { - size_t mesh_size = mesh->curve_keys.size(); - size_t steps = mesh->motion_steps - 1; + size_t hair_size = hair->curve_keys.size(); + size_t steps = hair->motion_steps - 1; float3 *key_steps = attr->data_float3(); for (size_t i = 0; i < steps; i++) { - curve.bounds_grow(k, key_steps + i * mesh_size, &mesh->curve_radius[0], bbox); + curve.bounds_grow(k, key_steps + i * hair_size, &hair->curve_radius[0], bbox); } } } } else { /* Triangles. */ - int tri_offset = (params.top_level) ? mesh->tri_offset : 0; - Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset); + const Mesh *mesh = static_cast<const Mesh *>(ob->geometry); + int prim_offset = (params.top_level) ? mesh->prim_offset : 0; + Mesh::Triangle triangle = mesh->get_triangle(pidx - prim_offset); const float3 *vpos = &mesh->verts[0]; triangle.bounds_grow(vpos, bbox); diff --git a/intern/cycles/bvh/bvh8.h b/intern/cycles/bvh/bvh8.h index 5f26fd423e1..d23fa528e3e 100644 --- a/intern/cycles/bvh/bvh8.h +++ b/intern/cycles/bvh/bvh8.h @@ -57,7 +57,9 @@ class BVH8 : public BVH { protected: /* constructor */ friend class BVH; - BVH8(const BVHParams ¶ms, const vector<Mesh *> &meshes, const vector<Object *> &objects); + BVH8(const BVHParams ¶ms, + const vector<Geometry *> &geometry, + const vector<Object *> &objects); /* Building process. */ virtual BVHNode *widen_children_nodes(const BVHNode *root) override; diff --git a/intern/cycles/bvh/bvh_build.cpp b/intern/cycles/bvh/bvh_build.cpp index 1d9b006e8cb..db156219f09 100644 --- a/intern/cycles/bvh/bvh_build.cpp +++ b/intern/cycles/bvh/bvh_build.cpp @@ -22,19 +22,20 @@ #include "bvh/bvh_params.h" #include "bvh_split.h" +#include "render/curves.h" +#include "render/hair.h" #include "render/mesh.h" #include "render/object.h" #include "render/scene.h" -#include "render/curves.h" #include "util/util_algorithm.h" #include "util/util_foreach.h" #include "util/util_logging.h" #include "util/util_progress.h" -#include "util/util_stack_allocator.h" +#include "util/util_queue.h" #include "util/util_simd.h" +#include "util/util_stack_allocator.h" #include "util/util_time.h" -#include "util/util_queue.h" CCL_NAMESPACE_BEGIN @@ -194,21 +195,21 @@ void BVHBuild::add_reference_triangles(BoundBox &root, BoundBox ¢er, Mesh *m } } -void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Mesh *mesh, int i) +void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair, int i) { const Attribute *curve_attr_mP = NULL; - if (mesh->has_motion_blur()) { - curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + if (hair->has_motion_blur()) { + curve_attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); } - const size_t num_curves = mesh->num_curves(); + const size_t num_curves = hair->num_curves(); for (uint j = 0; j < num_curves; j++) { - const Mesh::Curve curve = mesh->get_curve(j); - const float *curve_radius = &mesh->curve_radius[0]; + const Hair::Curve curve = hair->get_curve(j); + const float *curve_radius = &hair->curve_radius[0]; for (int k = 0; k < curve.num_keys - 1; k++) { if (curve_attr_mP == NULL) { /* Really simple logic for static hair. */ BoundBox bounds = BoundBox::empty; - curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds); + curve.bounds_grow(k, &hair->curve_keys[0], curve_radius, bounds); if (bounds.valid()) { int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_CURVE, k); references.push_back(BVHReference(bounds, j, i, packed_type)); @@ -223,9 +224,9 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Mesh *mesh */ /* TODO(sergey): Support motion steps for spatially split BVH. */ BoundBox bounds = BoundBox::empty; - curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds); - const size_t num_keys = mesh->curve_keys.size(); - const size_t num_steps = mesh->motion_steps; + curve.bounds_grow(k, &hair->curve_keys[0], curve_radius, bounds); + const size_t num_keys = hair->curve_keys.size(); + const size_t num_steps = hair->motion_steps; const float3 *key_steps = curve_attr_mP->data_float3(); for (size_t step = 0; step < num_steps - 1; step++) { curve.bounds_grow(k, key_steps + step * num_keys, curve_radius, bounds); @@ -244,10 +245,10 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Mesh *mesh */ const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1; const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1); - const size_t num_steps = mesh->motion_steps; - const float3 *curve_keys = &mesh->curve_keys[0]; + const size_t num_steps = hair->motion_steps; + const float3 *curve_keys = &hair->curve_keys[0]; const float3 *key_steps = curve_attr_mP->data_float3(); - const size_t num_keys = mesh->curve_keys.size(); + const size_t num_keys = hair->curve_keys.size(); /* Calculate bounding box of the previous time step. * Will be reused later to avoid duplicated work on * calculating BVH time step boundbox. @@ -302,13 +303,15 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Mesh *mesh } } -void BVHBuild::add_reference_mesh(BoundBox &root, BoundBox ¢er, Mesh *mesh, int i) +void BVHBuild::add_reference_geometry(BoundBox &root, BoundBox ¢er, Geometry *geom, int i) { - if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) { + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); add_reference_triangles(root, center, mesh, i); } - if (params.primitive_mask & PRIMITIVE_ALL_CURVE) { - add_reference_curves(root, center, mesh, i); + else if (geom->type == Geometry::HAIR) { + Hair *hair = static_cast<Hair *>(geom); + add_reference_curves(root, center, hair, i); } } @@ -319,16 +322,30 @@ void BVHBuild::add_reference_object(BoundBox &root, BoundBox ¢er, Object *ob center.grow(ob->bounds.center2()); } -static size_t count_curve_segments(Mesh *mesh) +static size_t count_curve_segments(Hair *hair) { - size_t num = 0, num_curves = mesh->num_curves(); + size_t num = 0, num_curves = hair->num_curves(); for (size_t i = 0; i < num_curves; i++) - num += mesh->get_curve(i).num_keys - 1; + num += hair->get_curve(i).num_keys - 1; return num; } +static size_t count_primitives(Geometry *geom) +{ + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + return mesh->num_triangles(); + } + else if (geom->type == Geometry::HAIR) { + Hair *hair = static_cast<Hair *>(geom); + return count_curve_segments(hair); + } + + return 0; +} + void BVHBuild::add_references(BVHRange &root) { /* reserve space for references */ @@ -339,24 +356,14 @@ void BVHBuild::add_references(BVHRange &root) if (!ob->is_traceable()) { continue; } - if (!ob->mesh->is_instanced()) { - if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) { - num_alloc_references += ob->mesh->num_triangles(); - } - if (params.primitive_mask & PRIMITIVE_ALL_CURVE) { - num_alloc_references += count_curve_segments(ob->mesh); - } + if (!ob->geometry->is_instanced()) { + num_alloc_references += count_primitives(ob->geometry); } else num_alloc_references++; } else { - if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) { - num_alloc_references += ob->mesh->num_triangles(); - } - if (params.primitive_mask & PRIMITIVE_ALL_CURVE) { - num_alloc_references += count_curve_segments(ob->mesh); - } + num_alloc_references += count_primitives(ob->geometry); } } @@ -372,13 +379,13 @@ void BVHBuild::add_references(BVHRange &root) ++i; continue; } - if (!ob->mesh->is_instanced()) - add_reference_mesh(bounds, center, ob->mesh, i); + if (!ob->geometry->is_instanced()) + add_reference_geometry(bounds, center, ob->geometry, i); else add_reference_object(bounds, center, ob, i); } else - add_reference_mesh(bounds, center, ob->mesh, i); + add_reference_geometry(bounds, center, ob->geometry, i); i++; diff --git a/intern/cycles/bvh/bvh_build.h b/intern/cycles/bvh/bvh_build.h index 9685e26cfac..3fe4c3799e2 100644 --- a/intern/cycles/bvh/bvh_build.h +++ b/intern/cycles/bvh/bvh_build.h @@ -35,6 +35,8 @@ class BVHNode; class BVHSpatialSplitBuildTask; class BVHParams; class InnerNode; +class Geometry; +class Hair; class Mesh; class Object; class Progress; @@ -65,8 +67,8 @@ class BVHBuild { /* Adding references. */ void add_reference_triangles(BoundBox &root, BoundBox ¢er, Mesh *mesh, int i); - void add_reference_curves(BoundBox &root, BoundBox ¢er, Mesh *mesh, int i); - void add_reference_mesh(BoundBox &root, BoundBox ¢er, Mesh *mesh, int i); + void add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair, int i); + void add_reference_geometry(BoundBox &root, BoundBox ¢er, Geometry *geom, int i); void add_reference_object(BoundBox &root, BoundBox ¢er, Object *ob, int i); void add_references(BVHRange &root); diff --git a/intern/cycles/bvh/bvh_embree.cpp b/intern/cycles/bvh/bvh_embree.cpp index 3e4978a2c0a..9356adf3ea5 100644 --- a/intern/cycles/bvh/bvh_embree.cpp +++ b/intern/cycles/bvh/bvh_embree.cpp @@ -35,9 +35,9 @@ #ifdef WITH_EMBREE +# include <embree3/rtcore_geometry.h> # include <pmmintrin.h> # include <xmmintrin.h> -# include <embree3/rtcore_geometry.h> # include "bvh/bvh_embree.h" @@ -45,10 +45,11 @@ */ # include "kernel/bvh/bvh_embree.h" # include "kernel/kernel_compat_cpu.h" -# include "kernel/split/kernel_split_data_types.h" # include "kernel/kernel_globals.h" # include "kernel/kernel_random.h" +# include "kernel/split/kernel_split_data_types.h" +# include "render/hair.h" # include "render/mesh.h" # include "render/object.h" # include "util/util_foreach.h" @@ -57,6 +58,11 @@ CCL_NAMESPACE_BEGIN +static_assert(Object::MAX_MOTION_STEPS <= RTC_MAX_TIME_STEP_COUNT, + "Object and Embree max motion steps inconsistent"); +static_assert(Object::MAX_MOTION_STEPS == Geometry::MAX_MOTION_STEPS, + "Object and Geometry max motion steps inconsistent"); + # define IS_HAIR(x) (x & 1) /* This gets called by Embree at every valid ray/object intersection. @@ -301,10 +307,24 @@ RTCDevice BVHEmbree::rtc_shared_device = NULL; int BVHEmbree::rtc_shared_users = 0; thread_mutex BVHEmbree::rtc_shared_mutex; +static size_t count_primitives(Geometry *geom) +{ + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + return mesh->num_triangles(); + } + else if (geom->type == Geometry::HAIR) { + Hair *hair = static_cast<Hair *>(geom); + return hair->num_segments(); + } + + return 0; +} + BVHEmbree::BVHEmbree(const BVHParams ¶ms_, - const vector<Mesh *> &meshes_, + const vector<Geometry *> &geometry_, const vector<Object *> &objects_) - : BVH(params_, meshes_, objects_), + : BVH(params_, geometry_, objects_), scene(NULL), mem_used(0), top_level(NULL), @@ -325,7 +345,7 @@ BVHEmbree::BVHEmbree(const BVHParams ¶ms_, if (ret != 1) { assert(0); VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED flag." - "Ray visiblity will not work."; + "Ray visibility will not work."; } ret = rtcGetDeviceProperty(rtc_shared_device, RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED); if (ret != 1) { @@ -436,29 +456,15 @@ void BVHEmbree::build(Progress &progress, Stats *stats_) if (!ob->is_traceable()) { continue; } - if (!ob->mesh->is_instanced()) { - if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) { - prim_count += ob->mesh->num_triangles(); - } - if (params.primitive_mask & PRIMITIVE_ALL_CURVE) { - for (size_t j = 0; j < ob->mesh->num_curves(); ++j) { - prim_count += ob->mesh->get_curve(j).num_segments(); - } - } + if (!ob->geometry->is_instanced()) { + prim_count += count_primitives(ob->geometry); } else { ++prim_count; } } else { - if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && ob->mesh->num_triangles() > 0) { - prim_count += ob->mesh->num_triangles(); - } - if (params.primitive_mask & PRIMITIVE_ALL_CURVE) { - for (size_t j = 0; j < ob->mesh->num_curves(); ++j) { - prim_count += ob->mesh->get_curve(j).num_segments(); - } - } + prim_count += count_primitives(ob->geometry); } } @@ -477,7 +483,7 @@ void BVHEmbree::build(Progress &progress, Stats *stats_) ++i; continue; } - if (!ob->mesh->is_instanced()) { + if (!ob->geometry->is_instanced()) { add_object(ob, i); } else { @@ -528,36 +534,57 @@ BVHNode *BVHEmbree::widen_children_nodes(const BVHNode * /*root*/) void BVHEmbree::add_object(Object *ob, int i) { - Mesh *mesh = ob->mesh; - if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && mesh->num_triangles() > 0) { - add_triangles(ob, i); + Geometry *geom = ob->geometry; + + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + if (mesh->num_triangles() > 0) { + add_triangles(ob, mesh, i); + } } - if (params.primitive_mask & PRIMITIVE_ALL_CURVE && mesh->num_curves() > 0) { - add_curves(ob, i); + else if (geom->type == Geometry::HAIR) { + Hair *hair = static_cast<Hair *>(geom); + if (hair->num_curves() > 0) { + add_curves(ob, hair, i); + } } } void BVHEmbree::add_instance(Object *ob, int i) { - if (!ob || !ob->mesh) { + if (!ob || !ob->geometry) { assert(0); return; } - BVHEmbree *instance_bvh = (BVHEmbree *)(ob->mesh->bvh); + BVHEmbree *instance_bvh = (BVHEmbree *)(ob->geometry->bvh); if (instance_bvh->top_level != this) { instance_bvh->top_level = this; } - const size_t num_motion_steps = ob->use_motion() ? ob->motion.size() : 1; + const size_t num_object_motion_steps = ob->use_motion() ? ob->motion.size() : 1; + const size_t num_motion_steps = min(num_object_motion_steps, RTC_MAX_TIME_STEP_COUNT); + assert(num_object_motion_steps <= RTC_MAX_TIME_STEP_COUNT); + RTCGeometry geom_id = rtcNewGeometry(rtc_shared_device, RTC_GEOMETRY_TYPE_INSTANCE); rtcSetGeometryInstancedScene(geom_id, instance_bvh->scene); rtcSetGeometryTimeStepCount(geom_id, num_motion_steps); if (ob->use_motion()) { + array<DecomposedTransform> decomp(ob->motion.size()); + transform_motion_decompose(decomp.data(), ob->motion.data(), ob->motion.size()); for (size_t step = 0; step < num_motion_steps; ++step) { - rtcSetGeometryTransform( - geom_id, step, RTC_FORMAT_FLOAT3X4_ROW_MAJOR, (const float *)&ob->motion[step]); + RTCQuaternionDecomposition rtc_decomp; + rtcInitQuaternionDecomposition(&rtc_decomp); + rtcQuaternionDecompositionSetQuaternion( + &rtc_decomp, decomp[step].x.w, decomp[step].x.x, decomp[step].x.y, decomp[step].x.z); + rtcQuaternionDecompositionSetScale( + &rtc_decomp, decomp[step].y.w, decomp[step].z.w, decomp[step].w.w); + rtcQuaternionDecompositionSetTranslation( + &rtc_decomp, decomp[step].y.x, decomp[step].y.y, decomp[step].y.z); + rtcQuaternionDecompositionSetSkew( + &rtc_decomp, decomp[step].z.x, decomp[step].z.y, decomp[step].w.x); + rtcSetGeometryTransformQuaternion(geom_id, step, &rtc_decomp); } } else { @@ -570,30 +597,28 @@ void BVHEmbree::add_instance(Object *ob, int i) pack.prim_tri_index.push_back_slow(-1); rtcSetGeometryUserData(geom_id, (void *)instance_bvh->scene); - rtcSetGeometryMask(geom_id, ob->visibility); + rtcSetGeometryMask(geom_id, ob->visibility_for_tracing()); rtcCommitGeometry(geom_id); rtcAttachGeometryByID(scene, geom_id, i * 2); rtcReleaseGeometry(geom_id); } -void BVHEmbree::add_triangles(Object *ob, int i) +void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i) { size_t prim_offset = pack.prim_index.size(); - Mesh *mesh = ob->mesh; const Attribute *attr_mP = NULL; - size_t num_motion_steps = 1; + size_t num_geometry_motion_steps = 1; if (mesh->has_motion_blur()) { attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); if (attr_mP) { - num_motion_steps = mesh->motion_steps; - if (num_motion_steps > RTC_MAX_TIME_STEP_COUNT) { - assert(0); - num_motion_steps = RTC_MAX_TIME_STEP_COUNT; - } + num_geometry_motion_steps = mesh->motion_steps; } } + const size_t num_motion_steps = min(num_geometry_motion_steps, RTC_MAX_TIME_STEP_COUNT); + assert(num_geometry_motion_steps <= RTC_MAX_TIME_STEP_COUNT); + const size_t num_triangles = mesh->num_triangles(); RTCGeometry geom_id = rtcNewGeometry(rtc_shared_device, RTC_GEOMETRY_TYPE_TRIANGLE); rtcSetGeometryBuildQuality(geom_id, build_quality); @@ -635,7 +660,7 @@ void BVHEmbree::add_triangles(Object *ob, int i) rtcSetGeometryUserData(geom_id, (void *)prim_offset); rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func); rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func); - rtcSetGeometryMask(geom_id, ob->visibility); + rtcSetGeometryMask(geom_id, ob->visibility_for_tracing()); rtcCommitGeometry(geom_id); rtcAttachGeometryByID(scene, geom_id, i * 2); @@ -684,31 +709,37 @@ void BVHEmbree::update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh) } } -void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh) +void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair) { const Attribute *attr_mP = NULL; size_t num_motion_steps = 1; - if (mesh->has_motion_blur()) { - attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + if (hair->has_motion_blur()) { + attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); if (attr_mP) { - num_motion_steps = mesh->motion_steps; + num_motion_steps = hair->motion_steps; } } - const size_t num_curves = mesh->num_curves(); + const size_t num_curves = hair->num_curves(); size_t num_keys = 0; for (size_t j = 0; j < num_curves; ++j) { - const Mesh::Curve c = mesh->get_curve(j); + const Hair::Curve c = hair->get_curve(j); num_keys += c.num_keys; } + /* Catmull-Rom splines need extra CVs at the beginning and end of each curve. */ + size_t num_keys_embree = num_keys; + if (use_curves) { + num_keys_embree += num_curves * 2; + } + /* Copy the CV data to Embree */ const int t_mid = (num_motion_steps - 1) / 2; - const float *curve_radius = &mesh->curve_radius[0]; + const float *curve_radius = &hair->curve_radius[0]; for (int t = 0; t < num_motion_steps; ++t) { const float3 *verts; if (t == t_mid || attr_mP == NULL) { - verts = &mesh->curve_keys[0]; + verts = &hair->curve_keys[0]; } else { int t_ = (t > t_mid) ? (t - 1) : t; @@ -716,42 +747,28 @@ void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh } float4 *rtc_verts = (float4 *)rtcSetNewGeometryBuffer( - geom_id, RTC_BUFFER_TYPE_VERTEX, t, RTC_FORMAT_FLOAT4, sizeof(float) * 4, num_keys); - float4 *rtc_tangents = NULL; - if (use_curves) { - rtc_tangents = (float4 *)rtcSetNewGeometryBuffer( - geom_id, RTC_BUFFER_TYPE_TANGENT, t, RTC_FORMAT_FLOAT4, sizeof(float) * 4, num_keys); - assert(rtc_tangents); - } + geom_id, RTC_BUFFER_TYPE_VERTEX, t, RTC_FORMAT_FLOAT4, sizeof(float) * 4, num_keys_embree); + assert(rtc_verts); if (rtc_verts) { - if (use_curves && rtc_tangents) { - const size_t num_curves = mesh->num_curves(); + if (use_curves) { + const size_t num_curves = hair->num_curves(); for (size_t j = 0; j < num_curves; ++j) { - Mesh::Curve c = mesh->get_curve(j); + Hair::Curve c = hair->get_curve(j); int fk = c.first_key; - rtc_verts[0] = float3_to_float4(verts[fk]); - rtc_verts[0].w = curve_radius[fk]; - rtc_tangents[0] = float3_to_float4(verts[fk + 1] - verts[fk]); - rtc_tangents[0].w = curve_radius[fk + 1] - curve_radius[fk]; - ++fk; int k = 1; - for (; k < c.num_segments(); ++k, ++fk) { + for (; k < c.num_keys + 1; ++k, ++fk) { rtc_verts[k] = float3_to_float4(verts[fk]); rtc_verts[k].w = curve_radius[fk]; - rtc_tangents[k] = float3_to_float4((verts[fk + 1] - verts[fk - 1]) * 0.5f); - rtc_tangents[k].w = (curve_radius[fk + 1] - curve_radius[fk - 1]) * 0.5f; } - rtc_verts[k] = float3_to_float4(verts[fk]); - rtc_verts[k].w = curve_radius[fk]; - rtc_tangents[k] = float3_to_float4(verts[fk] - verts[fk - 1]); - rtc_tangents[k].w = curve_radius[fk] - curve_radius[fk - 1]; - rtc_verts += c.num_keys; - rtc_tangents += c.num_keys; + /* Duplicate Embree's Catmull-Rom spline CVs at the start and end of each curve. */ + rtc_verts[0] = rtc_verts[1]; + rtc_verts[k] = rtc_verts[k - 1]; + rtc_verts += c.num_keys + 2; } } else { - for (size_t j = 0; j < num_keys; ++j) { + for (size_t j = 0; j < num_keys_embree; ++j) { rtc_verts[j] = float3_to_float4(verts[j]); rtc_verts[j].w = curve_radius[j]; } @@ -760,23 +777,25 @@ void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh } } -void BVHEmbree::add_curves(Object *ob, int i) +void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i) { size_t prim_offset = pack.prim_index.size(); - const Mesh *mesh = ob->mesh; const Attribute *attr_mP = NULL; - size_t num_motion_steps = 1; - if (mesh->has_motion_blur()) { - attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + size_t num_geometry_motion_steps = 1; + if (hair->has_motion_blur()) { + attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); if (attr_mP) { - num_motion_steps = mesh->motion_steps; + num_geometry_motion_steps = hair->motion_steps; } } - const size_t num_curves = mesh->num_curves(); + const size_t num_motion_steps = min(num_geometry_motion_steps, RTC_MAX_TIME_STEP_COUNT); + assert(num_geometry_motion_steps <= RTC_MAX_TIME_STEP_COUNT); + + const size_t num_curves = hair->num_curves(); size_t num_segments = 0; for (size_t j = 0; j < num_curves; ++j) { - Mesh::Curve c = mesh->get_curve(j); + Hair::Curve c = hair->get_curve(j); assert(c.num_segments() > 0); num_segments += c.num_segments(); } @@ -793,8 +812,8 @@ void BVHEmbree::add_curves(Object *ob, int i) enum RTCGeometryType type = (!use_curves) ? RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE : - (use_ribbons ? RTC_GEOMETRY_TYPE_FLAT_HERMITE_CURVE : - RTC_GEOMETRY_TYPE_ROUND_HERMITE_CURVE); + (use_ribbons ? RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE : + RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE); RTCGeometry geom_id = rtcNewGeometry(rtc_shared_device, type); rtcSetGeometryTessellationRate(geom_id, curve_subdivisions); @@ -802,9 +821,13 @@ void BVHEmbree::add_curves(Object *ob, int i) geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT, sizeof(int), num_segments); size_t rtc_index = 0; for (size_t j = 0; j < num_curves; ++j) { - Mesh::Curve c = mesh->get_curve(j); + Hair::Curve c = hair->get_curve(j); for (size_t k = 0; k < c.num_segments(); ++k) { rtc_indices[rtc_index] = c.first_key + k; + if (use_curves) { + /* Room for extra CVs at Catmull-Rom splines. */ + rtc_indices[rtc_index] += j * 2; + } /* Cycles specific data. */ pack.prim_object[prim_object_size + rtc_index] = i; pack.prim_type[prim_type_size + rtc_index] = (PRIMITIVE_PACK_SEGMENT( @@ -819,12 +842,12 @@ void BVHEmbree::add_curves(Object *ob, int i) rtcSetGeometryBuildQuality(geom_id, build_quality); rtcSetGeometryTimeStepCount(geom_id, num_motion_steps); - update_curve_vertex_buffer(geom_id, mesh); + update_curve_vertex_buffer(geom_id, hair); rtcSetGeometryUserData(geom_id, (void *)prim_offset); rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func); rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func); - rtcSetGeometryMask(geom_id, ob->visibility); + rtcSetGeometryMask(geom_id, ob->visibility_for_tracing()); rtcCommitGeometry(geom_id); rtcAttachGeometryByID(scene, geom_id, i * 2 + 1); @@ -840,10 +863,7 @@ void BVHEmbree::pack_nodes(const BVHNode *) for (size_t i = 0; i < pack.prim_index.size(); ++i) { if (pack.prim_index[i] != -1) { - if (pack.prim_type[i] & PRIMITIVE_ALL_CURVE) - pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->curve_offset; - else - pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->tri_offset; + pack.prim_index[i] += objects[pack.prim_object[i]]->geometry->prim_offset; } } @@ -857,22 +877,22 @@ void BVHEmbree::pack_nodes(const BVHNode *) size_t pack_prim_tri_verts_offset = prim_tri_verts_size; size_t object_offset = 0; - map<Mesh *, int> mesh_map; + map<Geometry *, int> geometry_map; foreach (Object *ob, objects) { - Mesh *mesh = ob->mesh; - BVH *bvh = mesh->bvh; + Geometry *geom = ob->geometry; + BVH *bvh = geom->bvh; - if (mesh->need_build_bvh(BVH_LAYOUT_EMBREE)) { - if (mesh_map.find(mesh) == mesh_map.end()) { + if (geom->need_build_bvh(BVH_LAYOUT_EMBREE)) { + if (geometry_map.find(geom) == geometry_map.end()) { prim_index_size += bvh->pack.prim_index.size(); prim_tri_verts_size += bvh->pack.prim_tri_verts.size(); - mesh_map[mesh] = 1; + geometry_map[geom] = 1; } } } - mesh_map.clear(); + geometry_map.clear(); pack.prim_index.resize(prim_index_size); pack.prim_type.resize(prim_index_size); @@ -890,38 +910,37 @@ void BVHEmbree::pack_nodes(const BVHNode *) /* merge */ foreach (Object *ob, objects) { - Mesh *mesh = ob->mesh; + Geometry *geom = ob->geometry; /* We assume that if mesh doesn't need own BVH it was already included * into a top-level BVH and no packing here is needed. */ - if (!mesh->need_build_bvh(BVH_LAYOUT_EMBREE)) { + if (!geom->need_build_bvh(BVH_LAYOUT_EMBREE)) { pack.object_node[object_offset++] = prim_offset; continue; } - /* if mesh already added once, don't add it again, but used set + /* if geom already added once, don't add it again, but used set * node offset for this object */ - map<Mesh *, int>::iterator it = mesh_map.find(mesh); + map<Geometry *, int>::iterator it = geometry_map.find(geom); - if (mesh_map.find(mesh) != mesh_map.end()) { + if (geometry_map.find(geom) != geometry_map.end()) { int noffset = it->second; pack.object_node[object_offset++] = noffset; continue; } - BVHEmbree *bvh = (BVHEmbree *)mesh->bvh; + BVHEmbree *bvh = (BVHEmbree *)geom->bvh; rtc_memory_monitor_func(stats, unaccounted_mem, true); unaccounted_mem = 0; - int mesh_tri_offset = mesh->tri_offset; - int mesh_curve_offset = mesh->curve_offset; + int geom_prim_offset = geom->prim_offset; /* fill in node indexes for instances */ pack.object_node[object_offset++] = prim_offset; - mesh_map[mesh] = pack.object_node[object_offset - 1]; + geometry_map[geom] = pack.object_node[object_offset - 1]; /* merge primitive, object and triangle indexes */ if (bvh->pack.prim_index.size()) { @@ -932,11 +951,11 @@ void BVHEmbree::pack_nodes(const BVHNode *) for (size_t i = 0; i < bvh_prim_index_size; ++i) { if (bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) { - pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_curve_offset; + pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset; pack_prim_tri_index[pack_prim_index_offset] = -1; } else { - pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset; + pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset; pack_prim_tri_index[pack_prim_index_offset] = bvh_prim_tri_index[i] + pack_prim_tri_verts_offset; } @@ -966,15 +985,22 @@ void BVHEmbree::refit_nodes() /* Update all vertex buffers, then tell Embree to rebuild/-fit the BVHs. */ unsigned geom_id = 0; foreach (Object *ob, objects) { - if (!params.top_level || (ob->is_traceable() && !ob->mesh->is_instanced())) { - if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && ob->mesh->num_triangles() > 0) { - update_tri_vertex_buffer(rtcGetGeometry(scene, geom_id), ob->mesh); - rtcCommitGeometry(rtcGetGeometry(scene, geom_id)); + if (!params.top_level || (ob->is_traceable() && !ob->geometry->is_instanced())) { + Geometry *geom = ob->geometry; + + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + if (mesh->num_triangles() > 0) { + update_tri_vertex_buffer(rtcGetGeometry(scene, geom_id), mesh); + rtcCommitGeometry(rtcGetGeometry(scene, geom_id)); + } } - - if (params.primitive_mask & PRIMITIVE_ALL_CURVE && ob->mesh->num_curves() > 0) { - update_curve_vertex_buffer(rtcGetGeometry(scene, geom_id + 1), ob->mesh); - rtcCommitGeometry(rtcGetGeometry(scene, geom_id + 1)); + else if (geom->type == Geometry::HAIR) { + Hair *hair = static_cast<Hair *>(geom); + if (hair->num_curves() > 0) { + update_curve_vertex_buffer(rtcGetGeometry(scene, geom_id + 1), hair); + rtcCommitGeometry(rtcGetGeometry(scene, geom_id + 1)); + } } } geom_id += 2; diff --git a/intern/cycles/bvh/bvh_embree.h b/intern/cycles/bvh/bvh_embree.h index 123e87dd9b0..eb121d060b7 100644 --- a/intern/cycles/bvh/bvh_embree.h +++ b/intern/cycles/bvh/bvh_embree.h @@ -31,6 +31,8 @@ CCL_NAMESPACE_BEGIN +class Geometry; +class Hair; class Mesh; class BVHEmbree : public BVH { @@ -47,7 +49,7 @@ class BVHEmbree : public BVH { protected: friend class BVH; BVHEmbree(const BVHParams ¶ms, - const vector<Mesh *> &meshes, + const vector<Geometry *> &geometry, const vector<Object *> &objects); virtual void pack_nodes(const BVHNode *) override; @@ -55,8 +57,8 @@ class BVHEmbree : public BVH { void add_object(Object *ob, int i); void add_instance(Object *ob, int i); - void add_curves(Object *ob, int i); - void add_triangles(Object *ob, int i); + void add_curves(const Object *ob, const Hair *hair, int i); + void add_triangles(const Object *ob, const Mesh *mesh, int i); ssize_t mem_used; @@ -69,7 +71,7 @@ class BVHEmbree : public BVH { private: void delete_rtcScene(); void update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh); - void update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh); + void update_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair); static RTCDevice rtc_shared_device; static int rtc_shared_users; diff --git a/intern/cycles/bvh/bvh_optix.cpp b/intern/cycles/bvh/bvh_optix.cpp index 86d755ab06a..26b64c24db5 100644 --- a/intern/cycles/bvh/bvh_optix.cpp +++ b/intern/cycles/bvh/bvh_optix.cpp @@ -18,17 +18,20 @@ #ifdef WITH_OPTIX # include "bvh/bvh_optix.h" +# include "render/geometry.h" +# include "render/hair.h" # include "render/mesh.h" # include "render/object.h" +# include "util/util_foreach.h" # include "util/util_logging.h" # include "util/util_progress.h" CCL_NAMESPACE_BEGIN BVHOptiX::BVHOptiX(const BVHParams ¶ms_, - const vector<Mesh *> &meshes_, + const vector<Geometry *> &geometry_, const vector<Object *> &objects_) - : BVH(params_, meshes_, objects_) + : BVH(params_, geometry_, objects_) { } @@ -56,47 +59,52 @@ void BVHOptiX::copy_to_device(Progress &progress, DeviceScene *dscene) void BVHOptiX::pack_blas() { // Bottom-level BVH can contain multiple primitive types, so merge them: - assert(meshes.size() == 1 && objects.size() == 1); // These are build per-mesh - Mesh *const mesh = meshes[0]; - - if (params.primitive_mask & PRIMITIVE_ALL_CURVE && mesh->num_curves() > 0) { - const size_t num_curves = mesh->num_curves(); - const size_t num_segments = mesh->num_segments(); - pack.prim_type.reserve(pack.prim_type.size() + num_segments); - pack.prim_index.reserve(pack.prim_index.size() + num_segments); - pack.prim_object.reserve(pack.prim_object.size() + num_segments); - // 'pack.prim_time' is only used in geom_curve_intersect.h - // It is not needed because of OPTIX_MOTION_FLAG_[START|END]_VANISH - - uint type = PRIMITIVE_CURVE; - if (mesh->use_motion_blur && mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) - type = PRIMITIVE_MOTION_CURVE; - - for (size_t j = 0; j < num_curves; ++j) { - const Mesh::Curve curve = mesh->get_curve(j); - for (size_t k = 0; k < curve.num_segments(); ++k) { - pack.prim_type.push_back_reserved(PRIMITIVE_PACK_SEGMENT(type, k)); - // Each curve segment points back to its curve index - pack.prim_index.push_back_reserved(j); - pack.prim_object.push_back_reserved(0); + assert(geometry.size() == 1 && objects.size() == 1); // These are built per-mesh + Geometry *const geom = geometry[0]; + + if (geom->type == Geometry::HAIR) { + Hair *const hair = static_cast<Hair *const>(geom); + if (hair->num_curves() > 0) { + const size_t num_curves = hair->num_curves(); + const size_t num_segments = hair->num_segments(); + pack.prim_type.reserve(pack.prim_type.size() + num_segments); + pack.prim_index.reserve(pack.prim_index.size() + num_segments); + pack.prim_object.reserve(pack.prim_object.size() + num_segments); + // 'pack.prim_time' is only used in geom_curve_intersect.h + // It is not needed because of OPTIX_MOTION_FLAG_[START|END]_VANISH + + uint type = PRIMITIVE_CURVE; + if (hair->use_motion_blur && hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) + type = PRIMITIVE_MOTION_CURVE; + + for (size_t j = 0; j < num_curves; ++j) { + const Hair::Curve curve = hair->get_curve(j); + for (size_t k = 0; k < curve.num_segments(); ++k) { + pack.prim_type.push_back_reserved(PRIMITIVE_PACK_SEGMENT(type, k)); + // Each curve segment points back to its curve index + pack.prim_index.push_back_reserved(j); + pack.prim_object.push_back_reserved(0); + } } } } - - if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && mesh->num_triangles() > 0) { - const size_t num_triangles = mesh->num_triangles(); - pack.prim_type.reserve(pack.prim_type.size() + num_triangles); - pack.prim_index.reserve(pack.prim_index.size() + num_triangles); - pack.prim_object.reserve(pack.prim_object.size() + num_triangles); - - uint type = PRIMITIVE_TRIANGLE; - if (mesh->use_motion_blur && mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) - type = PRIMITIVE_MOTION_TRIANGLE; - - for (size_t k = 0; k < num_triangles; ++k) { - pack.prim_type.push_back_reserved(type); - pack.prim_index.push_back_reserved(k); - pack.prim_object.push_back_reserved(0); + else if (geom->type == Geometry::MESH) { + Mesh *const mesh = static_cast<Mesh *const>(geom); + if (mesh->num_triangles() > 0) { + const size_t num_triangles = mesh->num_triangles(); + pack.prim_type.reserve(pack.prim_type.size() + num_triangles); + pack.prim_index.reserve(pack.prim_index.size() + num_triangles); + pack.prim_object.reserve(pack.prim_object.size() + num_triangles); + + uint type = PRIMITIVE_TRIANGLE; + if (mesh->use_motion_blur && mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) + type = PRIMITIVE_MOTION_TRIANGLE; + + for (size_t k = 0; k < num_triangles; ++k) { + pack.prim_type.push_back_reserved(type); + pack.prim_index.push_back_reserved(k); + pack.prim_object.push_back_reserved(0); + } } } @@ -116,8 +124,8 @@ void BVHOptiX::pack_tlas() // Calculate total packed size size_t prim_index_size = 0; size_t prim_tri_verts_size = 0; - foreach (Mesh *mesh, meshes) { - BVH *const bvh = mesh->bvh; + foreach (Geometry *geom, geometry) { + BVH *const bvh = geom->bvh; prim_index_size += bvh->pack.prim_index.size(); prim_tri_verts_size += bvh->pack.prim_tri_verts.size(); } @@ -141,13 +149,12 @@ void BVHOptiX::pack_tlas() pack.prim_tri_verts.resize(prim_tri_verts_size); float4 *pack_prim_tri_verts = pack.prim_tri_verts.data(); - // Top-level BVH should only contain instances, see 'Mesh::need_build_bvh' + // Top-level BVH should only contain instances, see 'Geometry::need_build_bvh' // Iterate over scene mesh list instead of objects, since the 'prim_offset' is calculated based // on that list, which may be ordered differently from the object list. - foreach (Mesh *mesh, meshes) { - PackedBVH &bvh_pack = mesh->bvh->pack; - int mesh_tri_offset = mesh->tri_offset; - int mesh_curve_offset = mesh->curve_offset; + foreach (Geometry *geom, geometry) { + PackedBVH &bvh_pack = geom->bvh->pack; + int geom_prim_offset = geom->prim_offset; // Merge primitive, object and triangle indexes if (!bvh_pack.prim_index.empty()) { @@ -158,16 +165,16 @@ void BVHOptiX::pack_tlas() for (size_t i = 0; i < bvh_pack.prim_index.size(); i++, pack_offset++) { if (bvh_pack.prim_type[i] & PRIMITIVE_ALL_CURVE) { - pack_prim_index[pack_offset] = bvh_prim_index[i] + mesh_curve_offset; + pack_prim_index[pack_offset] = bvh_prim_index[i] + geom_prim_offset; pack_prim_tri_index[pack_offset] = -1; } else { - pack_prim_index[pack_offset] = bvh_prim_index[i] + mesh_tri_offset; + pack_prim_index[pack_offset] = bvh_prim_index[i] + geom_prim_offset; pack_prim_tri_index[pack_offset] = bvh_prim_tri_index[i] + pack_verts_offset; } pack_prim_type[pack_offset] = bvh_prim_type[i]; - pack_prim_object[pack_offset] = 0; // Unused for instanced meshes + pack_prim_object[pack_offset] = 0; // Unused for instanced geometry pack_prim_visibility[pack_offset] = bvh_prim_visibility[i]; } } @@ -182,15 +189,24 @@ void BVHOptiX::pack_tlas() } } - // Merge visibility flags of all objects and fix object indices for non-instanced meshes + // Merge visibility flags of all objects and fix object indices for non-instanced geometry foreach (Object *ob, objects) { - Mesh *const mesh = ob->mesh; - for (size_t i = 0; i < mesh->num_primitives(); ++i) { - if (!ob->mesh->is_instanced()) { - assert(pack.prim_object[mesh->prim_offset + i] == 0); - pack.prim_object[mesh->prim_offset + i] = ob->get_device_index(); + Geometry *const geom = ob->geometry; + size_t num_primitives = 0; + + if (geom->type == Geometry::MESH) { + num_primitives = static_cast<Mesh *const>(geom)->num_triangles(); + } + else if (geom->type == Geometry::HAIR) { + num_primitives = static_cast<Hair *const>(geom)->num_segments(); + } + + for (size_t i = 0; i < num_primitives; ++i) { + if (!geom->is_instanced()) { + assert(pack.prim_object[geom->optix_prim_offset + i] == 0); + pack.prim_object[geom->optix_prim_offset + i] = ob->get_device_index(); } - pack.prim_visibility[mesh->prim_offset + i] |= ob->visibility_for_tracing(); + pack.prim_visibility[geom->optix_prim_offset + i] |= ob->visibility_for_tracing(); } } } diff --git a/intern/cycles/bvh/bvh_optix.h b/intern/cycles/bvh/bvh_optix.h index 35033fe635f..e4745b093b5 100644 --- a/intern/cycles/bvh/bvh_optix.h +++ b/intern/cycles/bvh/bvh_optix.h @@ -26,11 +26,16 @@ CCL_NAMESPACE_BEGIN +class Geometry; +class Optix; + class BVHOptiX : public BVH { friend class BVH; public: - BVHOptiX(const BVHParams ¶ms, const vector<Mesh *> &meshes, const vector<Object *> &objects); + BVHOptiX(const BVHParams ¶ms, + const vector<Geometry *> &geometry, + const vector<Object *> &objects); virtual ~BVHOptiX(); virtual void build(Progress &progress, Stats *) override; diff --git a/intern/cycles/bvh/bvh_params.h b/intern/cycles/bvh/bvh_params.h index 2731662a39d..5e2c4b63f1b 100644 --- a/intern/cycles/bvh/bvh_params.h +++ b/intern/cycles/bvh/bvh_params.h @@ -69,9 +69,6 @@ class BVHParams { /* BVH layout to be built. */ BVHLayout bvh_layout; - /* Mask of primitives to be included into the BVH. */ - int primitive_mask; - /* Use unaligned bounding boxes. * Only used for curves BVH. */ @@ -120,8 +117,6 @@ class BVHParams { bvh_layout = BVH_LAYOUT_BVH2; use_unaligned_nodes = false; - primitive_mask = PRIMITIVE_ALL; - num_motion_curve_steps = 0; num_motion_triangle_steps = 0; diff --git a/intern/cycles/bvh/bvh_split.cpp b/intern/cycles/bvh/bvh_split.cpp index bd261c10d55..acdca0f13ad 100644 --- a/intern/cycles/bvh/bvh_split.cpp +++ b/intern/cycles/bvh/bvh_split.cpp @@ -20,6 +20,7 @@ #include "bvh/bvh_build.h" #include "bvh/bvh_sort.h" +#include "render/hair.h" #include "render/mesh.h" #include "render/object.h" @@ -378,7 +379,7 @@ void BVHSpatialSplit::split_triangle_primitive(const Mesh *mesh, } } -void BVHSpatialSplit::split_curve_primitive(const Mesh *mesh, +void BVHSpatialSplit::split_curve_primitive(const Hair *hair, const Transform *tfm, int prim_index, int segment_index, @@ -388,11 +389,11 @@ void BVHSpatialSplit::split_curve_primitive(const Mesh *mesh, BoundBox &right_bounds) { /* curve split: NOTE - Currently ignores curve width and needs to be fixed.*/ - Mesh::Curve curve = mesh->get_curve(prim_index); + Hair::Curve curve = hair->get_curve(prim_index); const int k0 = curve.first_key + segment_index; const int k1 = k0 + 1; - float3 v0 = mesh->curve_keys[k0]; - float3 v1 = mesh->curve_keys[k1]; + float3 v0 = hair->curve_keys[k0]; + float3 v1 = hair->curve_keys[k1]; if (tfm != NULL) { v0 = transform_point(tfm, v0); @@ -436,13 +437,13 @@ void BVHSpatialSplit::split_triangle_reference(const BVHReference &ref, } void BVHSpatialSplit::split_curve_reference(const BVHReference &ref, - const Mesh *mesh, + const Hair *hair, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds) { - split_curve_primitive(mesh, + split_curve_primitive(hair, NULL, ref.prim_index(), PRIMITIVE_UNPACK_SEGMENT(ref.prim_type()), @@ -455,15 +456,22 @@ void BVHSpatialSplit::split_curve_reference(const BVHReference &ref, void BVHSpatialSplit::split_object_reference( const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds) { - Mesh *mesh = object->mesh; - for (int tri_idx = 0; tri_idx < mesh->num_triangles(); ++tri_idx) { - split_triangle_primitive(mesh, &object->tfm, tri_idx, dim, pos, left_bounds, right_bounds); + Geometry *geom = object->geometry; + + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + for (int tri_idx = 0; tri_idx < mesh->num_triangles(); ++tri_idx) { + split_triangle_primitive(mesh, &object->tfm, tri_idx, dim, pos, left_bounds, right_bounds); + } } - for (int curve_idx = 0; curve_idx < mesh->num_curves(); ++curve_idx) { - Mesh::Curve curve = mesh->get_curve(curve_idx); - for (int segment_idx = 0; segment_idx < curve.num_keys - 1; ++segment_idx) { - split_curve_primitive( - mesh, &object->tfm, curve_idx, segment_idx, dim, pos, left_bounds, right_bounds); + else if (geom->type == Geometry::HAIR) { + Hair *hair = static_cast<Hair *>(geom); + for (int curve_idx = 0; curve_idx < hair->num_curves(); ++curve_idx) { + Hair::Curve curve = hair->get_curve(curve_idx); + for (int segment_idx = 0; segment_idx < curve.num_keys - 1; ++segment_idx) { + split_curve_primitive( + hair, &object->tfm, curve_idx, segment_idx, dim, pos, left_bounds, right_bounds); + } } } } @@ -481,13 +489,14 @@ void BVHSpatialSplit::split_reference(const BVHBuild &builder, /* loop over vertices/edges. */ const Object *ob = builder.objects[ref.prim_object()]; - const Mesh *mesh = ob->mesh; if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) { + Mesh *mesh = static_cast<Mesh *>(ob->geometry); split_triangle_reference(ref, mesh, dim, pos, left_bounds, right_bounds); } else if (ref.prim_type() & PRIMITIVE_ALL_CURVE) { - split_curve_reference(ref, mesh, dim, pos, left_bounds, right_bounds); + Hair *hair = static_cast<Hair *>(ob->geometry); + split_curve_reference(ref, hair, dim, pos, left_bounds, right_bounds); } else { split_object_reference(ob, dim, pos, left_bounds, right_bounds); diff --git a/intern/cycles/bvh/bvh_split.h b/intern/cycles/bvh/bvh_split.h index eddd1c27f49..5f2e41cf343 100644 --- a/intern/cycles/bvh/bvh_split.h +++ b/intern/cycles/bvh/bvh_split.h @@ -24,6 +24,8 @@ CCL_NAMESPACE_BEGIN class BVHBuild; +class Hair; +class Mesh; struct Transform; /* Object Split */ @@ -113,7 +115,7 @@ class BVHSpatialSplit { float pos, BoundBox &left_bounds, BoundBox &right_bounds); - void split_curve_primitive(const Mesh *mesh, + void split_curve_primitive(const Hair *hair, const Transform *tfm, int prim_index, int segment_index, @@ -134,7 +136,7 @@ class BVHSpatialSplit { BoundBox &left_bounds, BoundBox &right_bounds); void split_curve_reference(const BVHReference &ref, - const Mesh *mesh, + const Hair *hair, int dim, float pos, BoundBox &left_bounds, diff --git a/intern/cycles/bvh/bvh_unaligned.cpp b/intern/cycles/bvh/bvh_unaligned.cpp index 1843ca403a5..f0995f343fe 100644 --- a/intern/cycles/bvh/bvh_unaligned.cpp +++ b/intern/cycles/bvh/bvh_unaligned.cpp @@ -16,7 +16,7 @@ #include "bvh/bvh_unaligned.h" -#include "render/mesh.h" +#include "render/hair.h" #include "render/object.h" #include "bvh/bvh_binning.h" @@ -71,10 +71,10 @@ bool BVHUnaligned::compute_aligned_space(const BVHReference &ref, Transform *ali if (type & PRIMITIVE_CURVE) { const int curve_index = ref.prim_index(); const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type); - const Mesh *mesh = object->mesh; - const Mesh::Curve &curve = mesh->get_curve(curve_index); + const Hair *hair = static_cast<const Hair *>(object->geometry); + const Hair::Curve &curve = hair->get_curve(curve_index); const int key = curve.first_key + segment; - const float3 v1 = mesh->curve_keys[key], v2 = mesh->curve_keys[key + 1]; + const float3 v1 = hair->curve_keys[key], v2 = hair->curve_keys[key + 1]; float length; const float3 axis = normalize_len(v2 - v1, &length); if (length > 1e-6f) { @@ -96,10 +96,10 @@ BoundBox BVHUnaligned::compute_aligned_prim_boundbox(const BVHReference &prim, if (type & PRIMITIVE_CURVE) { const int curve_index = prim.prim_index(); const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type); - const Mesh *mesh = object->mesh; - const Mesh::Curve &curve = mesh->get_curve(curve_index); + const Hair *hair = static_cast<const Hair *>(object->geometry); + const Hair::Curve &curve = hair->get_curve(curve_index); curve.bounds_grow( - segment, &mesh->curve_keys[0], &mesh->curve_radius[0], aligned_space, bounds); + segment, &hair->curve_keys[0], &hair->curve_radius[0], aligned_space, bounds); } else { bounds = prim.bounds().transformed(&aligned_space); diff --git a/intern/cycles/cmake/external_libs.cmake b/intern/cycles/cmake/external_libs.cmake index 5bf681792ca..0b082b11cf7 100644 --- a/intern/cycles/cmake/external_libs.cmake +++ b/intern/cycles/cmake/external_libs.cmake @@ -135,7 +135,7 @@ if(CYCLES_STANDALONE_REPOSITORY) #### # embree if(WITH_CYCLES_EMBREE) - find_package(embree 3.2.4 REQUIRED) + find_package(embree 3.8.0 REQUIRED) endif() #### diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt index 35a79356957..aa5b65a2b73 100644 --- a/intern/cycles/device/CMakeLists.txt +++ b/intern/cycles/device/CMakeLists.txt @@ -34,13 +34,17 @@ set(SRC device_task.cpp ) +set(SRC_CUDA + cuda/device_cuda.h + cuda/device_cuda_impl.cpp +) + set(SRC_OPENCL - opencl/opencl.h + opencl/device_opencl.h + opencl/device_opencl_impl.cpp opencl/memory_manager.h - - opencl/opencl_split.cpp - opencl/opencl_util.cpp opencl/memory_manager.cpp + opencl/opencl_util.cpp ) if(WITH_CYCLES_NETWORK) @@ -98,4 +102,4 @@ endif() include_directories(${INC}) include_directories(SYSTEM ${INC_SYS}) -cycles_add_library(cycles_device "${LIB}" ${SRC} ${SRC_OPENCL} ${SRC_HEADERS}) +cycles_add_library(cycles_device "${LIB}" ${SRC} ${SRC_CUDA} ${SRC_OPENCL} ${SRC_HEADERS}) diff --git a/intern/cycles/device/cuda/device_cuda.h b/intern/cycles/device/cuda/device_cuda.h new file mode 100644 index 00000000000..3e397da895b --- /dev/null +++ b/intern/cycles/device/cuda/device_cuda.h @@ -0,0 +1,269 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef WITH_CUDA + +# include "device/device.h" +# include "device/device_denoising.h" +# include "device/device_split_kernel.h" + +# include "util/util_map.h" + +# ifdef WITH_CUDA_DYNLOAD +# include "cuew.h" +# else +# include "util/util_opengl.h" +# include <cuda.h> +# include <cudaGL.h> +# endif + +CCL_NAMESPACE_BEGIN + +class CUDASplitKernel; + +class CUDADevice : public Device { + + friend class CUDASplitKernelFunction; + friend class CUDASplitKernel; + friend class CUDAContextScope; + + public: + DedicatedTaskPool task_pool; + CUdevice cuDevice; + CUcontext cuContext; + CUmodule cuModule, cuFilterModule; + size_t device_texture_headroom; + size_t device_working_headroom; + bool move_texture_to_host; + size_t map_host_used; + size_t map_host_limit; + int can_map_host; + int cuDevId; + int cuDevArchitecture; + bool first_error; + CUDASplitKernel *split_kernel; + + struct CUDAMem { + CUDAMem() : texobject(0), array(0), use_mapped_host(false) + { + } + + CUtexObject texobject; + CUarray array; + + /* If true, a mapped host memory in shared_pointer is being used. */ + bool use_mapped_host; + }; + typedef map<device_memory *, CUDAMem> CUDAMemMap; + CUDAMemMap cuda_mem_map; + + struct PixelMem { + GLuint cuPBO; + CUgraphicsResource cuPBOresource; + GLuint cuTexId; + int w, h; + }; + map<device_ptr, PixelMem> pixel_mem_map; + + /* Bindless Textures */ + device_vector<TextureInfo> texture_info; + bool need_texture_info; + + /* Kernels */ + struct { + bool loaded; + + CUfunction adaptive_stopping; + CUfunction adaptive_filter_x; + CUfunction adaptive_filter_y; + CUfunction adaptive_scale_samples; + int adaptive_num_threads_per_block; + } functions; + + static bool have_precompiled_kernels(); + + virtual bool show_samples() const; + + virtual BVHLayoutMask get_bvh_layout_mask() const; + + void cuda_error_documentation(); + + bool cuda_error_(CUresult result, const string &stmt); + + void cuda_error_message(const string &message); + + CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_); + + virtual ~CUDADevice(); + + bool support_device(const DeviceRequestedFeatures & /*requested_features*/); + + bool use_adaptive_compilation(); + + bool use_split_kernel(); + + virtual string compile_kernel_get_common_cflags( + const DeviceRequestedFeatures &requested_features, bool filter = false, bool split = false); + + string compile_kernel(const DeviceRequestedFeatures &requested_features, + const char *name, + const char *base = "cuda", + bool force_ptx = false); + + virtual bool load_kernels(const DeviceRequestedFeatures &requested_features); + + void load_functions(); + + void reserve_local_memory(const DeviceRequestedFeatures &requested_features); + + void init_host_memory(); + + void load_texture_info(); + + void move_textures_to_host(size_t size, bool for_texture); + + CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0); + + void generic_copy_to(device_memory &mem); + + void generic_free(device_memory &mem); + + void mem_alloc(device_memory &mem); + + void mem_copy_to(device_memory &mem); + + void mem_copy_from(device_memory &mem, int y, int w, int h, int elem); + + void mem_zero(device_memory &mem); + + void mem_free(device_memory &mem); + + device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/); + + virtual void const_copy_to(const char *name, void *host, size_t size); + + void global_alloc(device_memory &mem); + + void global_free(device_memory &mem); + + void tex_alloc(device_texture &mem); + + void tex_free(device_texture &mem); + + bool denoising_non_local_means(device_ptr image_ptr, + device_ptr guide_ptr, + device_ptr variance_ptr, + device_ptr out_ptr, + DenoisingTask *task); + + bool denoising_construct_transform(DenoisingTask *task); + + bool denoising_accumulate(device_ptr color_ptr, + device_ptr color_variance_ptr, + device_ptr scale_ptr, + int frame, + DenoisingTask *task); + + bool denoising_solve(device_ptr output_ptr, DenoisingTask *task); + + bool denoising_combine_halves(device_ptr a_ptr, + device_ptr b_ptr, + device_ptr mean_ptr, + device_ptr variance_ptr, + int r, + int4 rect, + DenoisingTask *task); + + bool denoising_divide_shadow(device_ptr a_ptr, + device_ptr b_ptr, + device_ptr sample_variance_ptr, + device_ptr sv_variance_ptr, + device_ptr buffer_variance_ptr, + DenoisingTask *task); + + bool denoising_get_feature(int mean_offset, + int variance_offset, + device_ptr mean_ptr, + device_ptr variance_ptr, + float scale, + DenoisingTask *task); + + bool denoising_write_feature(int out_offset, + device_ptr from_ptr, + device_ptr buffer_ptr, + DenoisingTask *task); + + bool denoising_detect_outliers(device_ptr image_ptr, + device_ptr variance_ptr, + device_ptr depth_ptr, + device_ptr output_ptr, + DenoisingTask *task); + + void denoise(RenderTile &rtile, DenoisingTask &denoising); + + void adaptive_sampling_filter(uint filter_sample, + WorkTile *wtile, + CUdeviceptr d_wtile, + CUstream stream = 0); + void adaptive_sampling_post(RenderTile &rtile, + WorkTile *wtile, + CUdeviceptr d_wtile, + CUstream stream = 0); + + void path_trace(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles); + + void film_convert(DeviceTask &task, + device_ptr buffer, + device_ptr rgba_byte, + device_ptr rgba_half); + + void shader(DeviceTask &task); + + CUdeviceptr map_pixels(device_ptr mem); + + void unmap_pixels(device_ptr mem); + + void pixels_alloc(device_memory &mem); + + void pixels_copy_from(device_memory &mem, int y, int w, int h); + + void pixels_free(device_memory &mem); + + void draw_pixels(device_memory &mem, + int y, + int w, + int h, + int width, + int height, + int dx, + int dy, + int dw, + int dh, + bool transparent, + const DeviceDrawParams &draw_params); + + void thread_run(DeviceTask *task); + + virtual void task_add(DeviceTask &task); + + virtual void task_wait(); + + virtual void task_cancel(); +}; + +CCL_NAMESPACE_END + +#endif diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp new file mode 100644 index 00000000000..0f261ef2f70 --- /dev/null +++ b/intern/cycles/device/cuda/device_cuda_impl.cpp @@ -0,0 +1,2620 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef WITH_CUDA + +# include <climits> +# include <limits.h> +# include <stdio.h> +# include <stdlib.h> +# include <string.h> + +# include "device/cuda/device_cuda.h" +# include "device/device_intern.h" +# include "device/device_split_kernel.h" + +# include "render/buffers.h" + +# include "kernel/filter/filter_defines.h" + +# include "util/util_debug.h" +# include "util/util_foreach.h" +# include "util/util_logging.h" +# include "util/util_map.h" +# include "util/util_md5.h" +# include "util/util_opengl.h" +# include "util/util_path.h" +# include "util/util_string.h" +# include "util/util_system.h" +# include "util/util_time.h" +# include "util/util_types.h" +# include "util/util_windows.h" + +# include "kernel/split/kernel_split_data_types.h" + +CCL_NAMESPACE_BEGIN + +# ifndef WITH_CUDA_DYNLOAD + +/* Transparently implement some functions, so majority of the file does not need + * to worry about difference between dynamically loaded and linked CUDA at all. + */ + +namespace { + +const char *cuewErrorString(CUresult result) +{ + /* We can only give error code here without major code duplication, that + * should be enough since dynamic loading is only being disabled by folks + * who knows what they're doing anyway. + * + * NOTE: Avoid call from several threads. + */ + static string error; + error = string_printf("%d", result); + return error.c_str(); +} + +const char *cuewCompilerPath() +{ + return CYCLES_CUDA_NVCC_EXECUTABLE; +} + +int cuewCompilerVersion() +{ + return (CUDA_VERSION / 100) + (CUDA_VERSION % 100 / 10); +} + +} /* namespace */ +# endif /* WITH_CUDA_DYNLOAD */ + +class CUDADevice; + +class CUDASplitKernel : public DeviceSplitKernel { + CUDADevice *device; + + public: + explicit CUDASplitKernel(CUDADevice *device); + + virtual uint64_t state_buffer_size(device_memory &kg, device_memory &data, size_t num_threads); + + virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim, + RenderTile &rtile, + int num_global_elements, + device_memory &kernel_globals, + device_memory &kernel_data_, + device_memory &split_data, + device_memory &ray_state, + device_memory &queue_index, + device_memory &use_queues_flag, + device_memory &work_pool_wgs); + + virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name, + const DeviceRequestedFeatures &); + virtual int2 split_kernel_local_size(); + virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask *task); +}; + +/* Utility to push/pop CUDA context. */ +class CUDAContextScope { + public: + CUDAContextScope(CUDADevice *device); + ~CUDAContextScope(); + + private: + CUDADevice *device; +}; + +bool CUDADevice::have_precompiled_kernels() +{ + string cubins_path = path_get("lib"); + return path_exists(cubins_path); +} + +bool CUDADevice::show_samples() const +{ + /* The CUDADevice only processes one tile at a time, so showing samples is fine. */ + return true; +} + +BVHLayoutMask CUDADevice::get_bvh_layout_mask() const +{ + return BVH_LAYOUT_BVH2; +} + +void CUDADevice::cuda_error_documentation() +{ + if (first_error) { + fprintf(stderr, "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n"); + fprintf(stderr, + "https://docs.blender.org/manual/en/latest/render/cycles/gpu_rendering.html\n\n"); + first_error = false; + } +} + +# define cuda_assert(stmt) \ + { \ + CUresult result = stmt; \ +\ + if (result != CUDA_SUCCESS) { \ + string message = string_printf( \ + "CUDA error: %s in %s, line %d", cuewErrorString(result), #stmt, __LINE__); \ + if (error_msg == "") \ + error_msg = message; \ + fprintf(stderr, "%s\n", message.c_str()); \ + /*cuda_abort();*/ \ + cuda_error_documentation(); \ + } \ + } \ + (void)0 + +bool CUDADevice::cuda_error_(CUresult result, const string &stmt) +{ + if (result == CUDA_SUCCESS) + return false; + + string message = string_printf("CUDA error at %s: %s", stmt.c_str(), cuewErrorString(result)); + if (error_msg == "") + error_msg = message; + fprintf(stderr, "%s\n", message.c_str()); + cuda_error_documentation(); + return true; +} + +# define cuda_error(stmt) cuda_error_(stmt, # stmt) + +void CUDADevice::cuda_error_message(const string &message) +{ + if (error_msg == "") + error_msg = message; + fprintf(stderr, "%s\n", message.c_str()); + cuda_error_documentation(); +} + +CUDADevice::CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_) + : Device(info, stats, profiler, background_), texture_info(this, "__texture_info", MEM_GLOBAL) +{ + first_error = true; + background = background_; + + cuDevId = info.num; + cuDevice = 0; + cuContext = 0; + + cuModule = 0; + cuFilterModule = 0; + + split_kernel = NULL; + + need_texture_info = false; + + device_texture_headroom = 0; + device_working_headroom = 0; + move_texture_to_host = false; + map_host_limit = 0; + map_host_used = 0; + can_map_host = 0; + + functions.loaded = false; + + /* Intialize CUDA. */ + if (cuda_error(cuInit(0))) + return; + + /* Setup device and context. */ + if (cuda_error(cuDeviceGet(&cuDevice, cuDevId))) + return; + + /* CU_CTX_MAP_HOST for mapping host memory when out of device memory. + * CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render, + * so we can predict which memory to map to host. */ + cuda_assert( + cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice)); + + unsigned int ctx_flags = CU_CTX_LMEM_RESIZE_TO_MAX; + if (can_map_host) { + ctx_flags |= CU_CTX_MAP_HOST; + init_host_memory(); + } + + /* Create context. */ + CUresult result; + + if (background) { + result = cuCtxCreate(&cuContext, ctx_flags, cuDevice); + } + else { + result = cuGLCtxCreate(&cuContext, ctx_flags, cuDevice); + + if (result != CUDA_SUCCESS) { + result = cuCtxCreate(&cuContext, ctx_flags, cuDevice); + background = true; + } + } + + if (cuda_error_(result, "cuCtxCreate")) + return; + + int major, minor; + cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId); + cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId); + cuDevArchitecture = major * 100 + minor * 10; + + /* Pop context set by cuCtxCreate. */ + cuCtxPopCurrent(NULL); +} + +CUDADevice::~CUDADevice() +{ + task_pool.stop(); + + delete split_kernel; + + texture_info.free(); + + cuda_assert(cuCtxDestroy(cuContext)); +} + +bool CUDADevice::support_device(const DeviceRequestedFeatures & /*requested_features*/) +{ + int major, minor; + cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId); + cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId); + + /* We only support sm_30 and above */ + if (major < 3) { + cuda_error_message( + string_printf("CUDA device supported only with compute capability 3.0 or up, found %d.%d.", + major, + minor)); + return false; + } + + return true; +} + +bool CUDADevice::use_adaptive_compilation() +{ + return DebugFlags().cuda.adaptive_compile; +} + +bool CUDADevice::use_split_kernel() +{ + return DebugFlags().cuda.split_kernel; +} + +/* Common NVCC flags which stays the same regardless of shading model, + * kernel sources md5 and only depends on compiler or compilation settings. + */ +string CUDADevice::compile_kernel_get_common_cflags( + const DeviceRequestedFeatures &requested_features, bool filter, bool split) +{ + const int machine = system_cpu_bits(); + const string source_path = path_get("source"); + const string include_path = source_path; + string cflags = string_printf( + "-m%d " + "--ptxas-options=\"-v\" " + "--use_fast_math " + "-DNVCC " + "-I\"%s\"", + machine, + include_path.c_str()); + if (!filter && use_adaptive_compilation()) { + cflags += " " + requested_features.get_build_options(); + } + const char *extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS"); + if (extra_cflags) { + cflags += string(" ") + string(extra_cflags); + } +# ifdef WITH_CYCLES_DEBUG + cflags += " -D__KERNEL_DEBUG__"; +# endif + + if (split) { + cflags += " -D__SPLIT__"; + } + + return cflags; +} + +string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_features, + const char *name, + const char *base, + bool force_ptx) +{ + /* Compute kernel name. */ + int major, minor; + cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId); + cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId); + + /* Attempt to use kernel provided with Blender. */ + if (!use_adaptive_compilation()) { + if (!force_ptx) { + const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor)); + VLOG(1) << "Testing for pre-compiled kernel " << cubin << "."; + if (path_exists(cubin)) { + VLOG(1) << "Using precompiled kernel."; + return cubin; + } + } + + const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor)); + VLOG(1) << "Testing for pre-compiled kernel " << ptx << "."; + if (path_exists(ptx)) { + VLOG(1) << "Using precompiled kernel."; + return ptx; + } + } + + /* Try to use locally compiled kernel. */ + string source_path = path_get("source"); + const string source_md5 = path_files_md5_hash(source_path); + + /* We include cflags into md5 so changing cuda toolkit or changing other + * compiler command line arguments makes sure cubin gets re-built. + */ + string common_cflags = compile_kernel_get_common_cflags( + requested_features, strstr(name, "filter") != NULL, strstr(name, "split") != NULL); + const string kernel_md5 = util_md5_string(source_md5 + common_cflags); + + const char *const kernel_ext = force_ptx ? "ptx" : "cubin"; + const char *const kernel_arch = force_ptx ? "compute" : "sm"; + const string cubin_file = string_printf( + "cycles_%s_%s_%d%d_%s.%s", name, kernel_arch, major, minor, kernel_md5.c_str(), kernel_ext); + const string cubin = path_cache_get(path_join("kernels", cubin_file)); + VLOG(1) << "Testing for locally compiled kernel " << cubin << "."; + if (path_exists(cubin)) { + VLOG(1) << "Using locally compiled kernel."; + return cubin; + } + +# ifdef _WIN32 + if (!use_adaptive_compilation() && have_precompiled_kernels()) { + if (major < 3) { + cuda_error_message( + string_printf("CUDA device requires compute capability 3.0 or up, " + "found %d.%d. Your GPU is not supported.", + major, + minor)); + } + else { + cuda_error_message( + string_printf("CUDA binary kernel for this graphics card compute " + "capability (%d.%d) not found.", + major, + minor)); + } + return string(); + } +# endif + + /* Compile. */ + const char *const nvcc = cuewCompilerPath(); + if (nvcc == NULL) { + cuda_error_message( + "CUDA nvcc compiler not found. " + "Install CUDA toolkit in default location."); + return string(); + } + + const int nvcc_cuda_version = cuewCompilerVersion(); + VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << nvcc_cuda_version << "."; + if (nvcc_cuda_version < 80) { + printf( + "Unsupported CUDA version %d.%d detected, " + "you need CUDA 8.0 or newer.\n", + nvcc_cuda_version / 10, + nvcc_cuda_version % 10); + return string(); + } + else if (nvcc_cuda_version != 101) { + printf( + "CUDA version %d.%d detected, build may succeed but only " + "CUDA 10.1 is officially supported.\n", + nvcc_cuda_version / 10, + nvcc_cuda_version % 10); + } + + double starttime = time_dt(); + + path_create_directories(cubin); + + source_path = path_join(path_join(source_path, "kernel"), + path_join("kernels", path_join(base, string_printf("%s.cu", name)))); + + string command = string_printf( + "\"%s\" " + "-arch=%s_%d%d " + "--%s \"%s\" " + "-o \"%s\" " + "%s", + nvcc, + kernel_arch, + major, + minor, + kernel_ext, + source_path.c_str(), + cubin.c_str(), + common_cflags.c_str()); + + printf("Compiling CUDA kernel ...\n%s\n", command.c_str()); + +# ifdef _WIN32 + command = "call " + command; +# endif + if (system(command.c_str()) != 0) { + cuda_error_message( + "Failed to execute compilation command, " + "see console for details."); + return string(); + } + + /* Verify if compilation succeeded */ + if (!path_exists(cubin)) { + cuda_error_message( + "CUDA kernel compilation failed, " + "see console for details."); + return string(); + } + + printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime); + + return cubin; +} + +bool CUDADevice::load_kernels(const DeviceRequestedFeatures &requested_features) +{ + /* TODO(sergey): Support kernels re-load for CUDA devices. + * + * Currently re-loading kernel will invalidate memory pointers, + * causing problems in cuCtxSynchronize. + */ + if (cuFilterModule && cuModule) { + VLOG(1) << "Skipping kernel reload, not currently supported."; + return true; + } + + /* check if cuda init succeeded */ + if (cuContext == 0) + return false; + + /* check if GPU is supported */ + if (!support_device(requested_features)) + return false; + + /* get kernel */ + const char *kernel_name = use_split_kernel() ? "kernel_split" : "kernel"; + string cubin = compile_kernel(requested_features, kernel_name); + if (cubin.empty()) + return false; + + const char *filter_name = "filter"; + string filter_cubin = compile_kernel(requested_features, filter_name); + if (filter_cubin.empty()) + return false; + + /* open module */ + CUDAContextScope scope(this); + + string cubin_data; + CUresult result; + + if (path_read_text(cubin, cubin_data)) + result = cuModuleLoadData(&cuModule, cubin_data.c_str()); + else + result = CUDA_ERROR_FILE_NOT_FOUND; + + if (cuda_error_(result, "cuModuleLoad")) + cuda_error_message(string_printf("Failed loading CUDA kernel %s.", cubin.c_str())); + + if (path_read_text(filter_cubin, cubin_data)) + result = cuModuleLoadData(&cuFilterModule, cubin_data.c_str()); + else + result = CUDA_ERROR_FILE_NOT_FOUND; + + if (cuda_error_(result, "cuModuleLoad")) + cuda_error_message(string_printf("Failed loading CUDA kernel %s.", filter_cubin.c_str())); + + if (result == CUDA_SUCCESS) { + reserve_local_memory(requested_features); + } + + load_functions(); + + return (result == CUDA_SUCCESS); +} + +void CUDADevice::load_functions() +{ + /* TODO: load all functions here. */ + if (functions.loaded) { + return; + } + functions.loaded = true; + + cuda_assert(cuModuleGetFunction( + &functions.adaptive_stopping, cuModule, "kernel_cuda_adaptive_stopping")); + cuda_assert(cuModuleGetFunction( + &functions.adaptive_filter_x, cuModule, "kernel_cuda_adaptive_filter_x")); + cuda_assert(cuModuleGetFunction( + &functions.adaptive_filter_y, cuModule, "kernel_cuda_adaptive_filter_y")); + cuda_assert(cuModuleGetFunction( + &functions.adaptive_scale_samples, cuModule, "kernel_cuda_adaptive_scale_samples")); + + cuda_assert(cuFuncSetCacheConfig(functions.adaptive_stopping, CU_FUNC_CACHE_PREFER_L1)); + cuda_assert(cuFuncSetCacheConfig(functions.adaptive_filter_x, CU_FUNC_CACHE_PREFER_L1)); + cuda_assert(cuFuncSetCacheConfig(functions.adaptive_filter_y, CU_FUNC_CACHE_PREFER_L1)); + cuda_assert(cuFuncSetCacheConfig(functions.adaptive_scale_samples, CU_FUNC_CACHE_PREFER_L1)); + + int unused_min_blocks; + cuda_assert(cuOccupancyMaxPotentialBlockSize(&unused_min_blocks, + &functions.adaptive_num_threads_per_block, + functions.adaptive_scale_samples, + NULL, + 0, + 0)); +} + +void CUDADevice::reserve_local_memory(const DeviceRequestedFeatures &requested_features) +{ + if (use_split_kernel()) { + /* Split kernel mostly uses global memory and adaptive compilation, + * difficult to predict how much is needed currently. */ + return; + } + + /* Together with CU_CTX_LMEM_RESIZE_TO_MAX, this reserves local memory + * needed for kernel launches, so that we can reliably figure out when + * to allocate scene data in mapped host memory. */ + CUDAContextScope scope(this); + + size_t total = 0, free_before = 0, free_after = 0; + cuMemGetInfo(&free_before, &total); + + /* Get kernel function. */ + CUfunction cuPathTrace; + + if (requested_features.use_integrator_branched) { + cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace")); + } + else { + cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace")); + } + + cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)); + + int min_blocks, num_threads_per_block; + cuda_assert(cuOccupancyMaxPotentialBlockSize( + &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0)); + + /* Launch kernel, using just 1 block appears sufficient to reserve + * memory for all multiprocessors. It would be good to do this in + * parallel for the multi GPU case still to make it faster. */ + CUdeviceptr d_work_tiles = 0; + uint total_work_size = 0; + + void *args[] = {&d_work_tiles, &total_work_size}; + + cuda_assert(cuLaunchKernel(cuPathTrace, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0)); + + cuda_assert(cuCtxSynchronize()); + + cuMemGetInfo(&free_after, &total); + VLOG(1) << "Local memory reserved " << string_human_readable_number(free_before - free_after) + << " bytes. (" << string_human_readable_size(free_before - free_after) << ")"; + +# if 0 + /* For testing mapped host memory, fill up device memory. */ + const size_t keep_mb = 1024; + + while (free_after > keep_mb * 1024 * 1024LL) { + CUdeviceptr tmp; + cuda_assert(cuMemAlloc(&tmp, 10 * 1024 * 1024LL)); + cuMemGetInfo(&free_after, &total); + } +# endif +} + +void CUDADevice::init_host_memory() +{ + /* Limit amount of host mapped memory, because allocating too much can + * cause system instability. Leave at least half or 4 GB of system + * memory free, whichever is smaller. */ + size_t default_limit = 4 * 1024 * 1024 * 1024LL; + size_t system_ram = system_physical_ram(); + + if (system_ram > 0) { + if (system_ram / 2 > default_limit) { + map_host_limit = system_ram - default_limit; + } + else { + map_host_limit = system_ram / 2; + } + } + else { + VLOG(1) << "Mapped host memory disabled, failed to get system RAM"; + map_host_limit = 0; + } + + /* Amount of device memory to keep is free after texture memory + * and working memory allocations respectively. We set the working + * memory limit headroom lower so that some space is left after all + * texture memory allocations. */ + device_working_headroom = 32 * 1024 * 1024LL; // 32MB + device_texture_headroom = 128 * 1024 * 1024LL; // 128MB + + VLOG(1) << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit) + << " bytes. (" << string_human_readable_size(map_host_limit) << ")"; +} + +void CUDADevice::load_texture_info() +{ + if (need_texture_info) { + texture_info.copy_to_device(); + need_texture_info = false; + } +} + +void CUDADevice::move_textures_to_host(size_t size, bool for_texture) +{ + /* Signal to reallocate textures in host memory only. */ + move_texture_to_host = true; + + while (size > 0) { + /* Find suitable memory allocation to move. */ + device_memory *max_mem = NULL; + size_t max_size = 0; + bool max_is_image = false; + + foreach (CUDAMemMap::value_type &pair, cuda_mem_map) { + device_memory &mem = *pair.first; + CUDAMem *cmem = &pair.second; + + bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && + (&mem != &texture_info); + bool is_image = is_texture && (mem.data_height > 1); + + /* Can't move this type of memory. */ + if (!is_texture || cmem->array) { + continue; + } + + /* Already in host memory. */ + if (cmem->use_mapped_host) { + continue; + } + + /* For other textures, only move image textures. */ + if (for_texture && !is_image) { + continue; + } + + /* Try to move largest allocation, prefer moving images. */ + if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) { + max_is_image = is_image; + max_size = mem.device_size; + max_mem = &mem; + } + } + + /* Move to host memory. This part is mutex protected since + * multiple CUDA devices could be moving the memory. The + * first one will do it, and the rest will adopt the pointer. */ + if (max_mem) { + VLOG(1) << "Move memory from device to host: " << max_mem->name; + + static thread_mutex move_mutex; + thread_scoped_lock lock(move_mutex); + + /* Preserve the original device pointer, in case of multi device + * we can't change it because the pointer mapping would break. */ + device_ptr prev_pointer = max_mem->device_pointer; + size_t prev_size = max_mem->device_size; + + mem_copy_to(*max_mem); + size = (max_size >= size) ? 0 : size - max_size; + + max_mem->device_pointer = prev_pointer; + max_mem->device_size = prev_size; + } + else { + break; + } + } + + /* Update texture info array with new pointers. */ + load_texture_info(); + + move_texture_to_host = false; +} + +CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_padding) +{ + CUDAContextScope scope(this); + + CUdeviceptr device_pointer = 0; + size_t size = mem.memory_size() + pitch_padding; + + CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY; + const char *status = ""; + + /* First try allocating in device memory, respecting headroom. We make + * an exception for texture info. It is small and frequently accessed, + * so treat it as working memory. + * + * If there is not enough room for working memory, we will try to move + * textures to host memory, assuming the performance impact would have + * been worse for working memory. */ + bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info); + bool is_image = is_texture && (mem.data_height > 1); + + size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom; + + size_t total = 0, free = 0; + cuMemGetInfo(&free, &total); + + /* Move textures to host memory if needed. */ + if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) { + move_textures_to_host(size + headroom - free, is_texture); + cuMemGetInfo(&free, &total); + } + + /* Allocate in device memory. */ + if (!move_texture_to_host && (size + headroom) < free) { + mem_alloc_result = cuMemAlloc(&device_pointer, size); + if (mem_alloc_result == CUDA_SUCCESS) { + status = " in device memory"; + } + } + + /* Fall back to mapped host memory if needed and possible. */ + + void *shared_pointer = 0; + + if (mem_alloc_result != CUDA_SUCCESS && can_map_host) { + if (mem.shared_pointer) { + /* Another device already allocated host memory. */ + mem_alloc_result = CUDA_SUCCESS; + shared_pointer = mem.shared_pointer; + } + else if (map_host_used + size < map_host_limit) { + /* Allocate host memory ourselves. */ + mem_alloc_result = cuMemHostAlloc( + &shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED); + + assert((mem_alloc_result == CUDA_SUCCESS && shared_pointer != 0) || + (mem_alloc_result != CUDA_SUCCESS && shared_pointer == 0)); + } + + if (mem_alloc_result == CUDA_SUCCESS) { + cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, shared_pointer, 0)); + map_host_used += size; + status = " in host memory"; + } + else { + status = " failed, out of host memory"; + } + } + + if (mem_alloc_result != CUDA_SUCCESS) { + status = " failed, out of device and host memory"; + cuda_assert(mem_alloc_result); + } + + if (mem.name) { + VLOG(1) << "Buffer allocate: " << mem.name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")" << status; + } + + mem.device_pointer = (device_ptr)device_pointer; + mem.device_size = size; + stats.mem_alloc(size); + + if (!mem.device_pointer) { + return NULL; + } + + /* Insert into map of allocations. */ + CUDAMem *cmem = &cuda_mem_map[&mem]; + if (shared_pointer != 0) { + /* Replace host pointer with our host allocation. Only works if + * CUDA memory layout is the same and has no pitch padding. Also + * does not work if we move textures to host during a render, + * since other devices might be using the memory. */ + + if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer && + mem.host_pointer != shared_pointer) { + memcpy(shared_pointer, mem.host_pointer, size); + + /* A Call to device_memory::host_free() should be preceded by + * a call to device_memory::device_free() for host memory + * allocated by a device to be handled properly. Two exceptions + * are here and a call in OptiXDevice::generic_alloc(), where + * the current host memory can be assumed to be allocated by + * device_memory::host_alloc(), not by a device */ + + mem.host_free(); + mem.host_pointer = shared_pointer; + } + mem.shared_pointer = shared_pointer; + mem.shared_counter++; + cmem->use_mapped_host = true; + } + else { + cmem->use_mapped_host = false; + } + + return cmem; +} + +void CUDADevice::generic_copy_to(device_memory &mem) +{ + if (!mem.host_pointer || !mem.device_pointer) { + return; + } + + /* If use_mapped_host of mem is false, the current device only uses device memory allocated by + * cuMemAlloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from + * mem.host_pointer. */ + if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) { + const CUDAContextScope scope(this); + cuda_assert( + cuMemcpyHtoD((CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size())); + } +} + +void CUDADevice::generic_free(device_memory &mem) +{ + if (mem.device_pointer) { + CUDAContextScope scope(this); + const CUDAMem &cmem = cuda_mem_map[&mem]; + + /* If cmem.use_mapped_host is true, reference counting is used + * to safely free a mapped host memory. */ + + if (cmem.use_mapped_host) { + assert(mem.shared_pointer); + if (mem.shared_pointer) { + assert(mem.shared_counter > 0); + if (--mem.shared_counter == 0) { + if (mem.host_pointer == mem.shared_pointer) { + mem.host_pointer = 0; + } + cuMemFreeHost(mem.shared_pointer); + mem.shared_pointer = 0; + } + } + map_host_used -= mem.device_size; + } + else { + /* Free device memory. */ + cuMemFree(mem.device_pointer); + } + + stats.mem_free(mem.device_size); + mem.device_pointer = 0; + mem.device_size = 0; + + cuda_mem_map.erase(cuda_mem_map.find(&mem)); + } +} + +void CUDADevice::mem_alloc(device_memory &mem) +{ + if (mem.type == MEM_PIXELS && !background) { + pixels_alloc(mem); + } + else if (mem.type == MEM_TEXTURE) { + assert(!"mem_alloc not supported for textures."); + } + else if (mem.type == MEM_GLOBAL) { + assert(!"mem_alloc not supported for global memory."); + } + else { + generic_alloc(mem); + } +} + +void CUDADevice::mem_copy_to(device_memory &mem) +{ + if (mem.type == MEM_PIXELS) { + assert(!"mem_copy_to not supported for pixels."); + } + else if (mem.type == MEM_GLOBAL) { + global_free(mem); + global_alloc(mem); + } + else if (mem.type == MEM_TEXTURE) { + tex_free((device_texture &)mem); + tex_alloc((device_texture &)mem); + } + else { + if (!mem.device_pointer) { + generic_alloc(mem); + } + + generic_copy_to(mem); + } +} + +void CUDADevice::mem_copy_from(device_memory &mem, int y, int w, int h, int elem) +{ + if (mem.type == MEM_PIXELS && !background) { + pixels_copy_from(mem, y, w, h); + } + else if (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) { + assert(!"mem_copy_from not supported for textures."); + } + else if (mem.host_pointer) { + const size_t size = elem * w * h; + const size_t offset = elem * y * w; + + if (mem.device_pointer) { + const CUDAContextScope scope(this); + cuda_assert(cuMemcpyDtoH( + (char *)mem.host_pointer + offset, (CUdeviceptr)mem.device_pointer + offset, size)); + } + else { + memset((char *)mem.host_pointer + offset, 0, size); + } + } +} + +void CUDADevice::mem_zero(device_memory &mem) +{ + if (!mem.device_pointer) { + mem_alloc(mem); + } + if (!mem.device_pointer) { + return; + } + + /* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory + * regardless of mem.host_pointer and mem.shared_pointer. */ + if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) { + const CUDAContextScope scope(this); + cuda_assert(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size())); + } + else if (mem.host_pointer) { + memset(mem.host_pointer, 0, mem.memory_size()); + } +} + +void CUDADevice::mem_free(device_memory &mem) +{ + if (mem.type == MEM_PIXELS && !background) { + pixels_free(mem); + } + else if (mem.type == MEM_GLOBAL) { + global_free(mem); + } + else if (mem.type == MEM_TEXTURE) { + tex_free((device_texture &)mem); + } + else { + generic_free(mem); + } +} + +device_ptr CUDADevice::mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) +{ + return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset)); +} + +void CUDADevice::const_copy_to(const char *name, void *host, size_t size) +{ + CUDAContextScope scope(this); + CUdeviceptr mem; + size_t bytes; + + cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name)); + // assert(bytes == size); + cuda_assert(cuMemcpyHtoD(mem, host, size)); +} + +void CUDADevice::global_alloc(device_memory &mem) +{ + CUDAContextScope scope(this); + + generic_alloc(mem); + generic_copy_to(mem); + + const_copy_to(mem.name, &mem.device_pointer, sizeof(mem.device_pointer)); +} + +void CUDADevice::global_free(device_memory &mem) +{ + if (mem.device_pointer) { + CUDAContextScope scope(this); + generic_free(mem); + } +} + +void CUDADevice::tex_alloc(device_texture &mem) +{ + CUDAContextScope scope(this); + + /* General variables for both architectures */ + string bind_name = mem.name; + size_t dsize = datatype_size(mem.data_type); + size_t size = mem.memory_size(); + + CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP; + switch (mem.info.extension) { + case EXTENSION_REPEAT: + address_mode = CU_TR_ADDRESS_MODE_WRAP; + break; + case EXTENSION_EXTEND: + address_mode = CU_TR_ADDRESS_MODE_CLAMP; + break; + case EXTENSION_CLIP: + address_mode = CU_TR_ADDRESS_MODE_BORDER; + break; + default: + assert(0); + break; + } + + CUfilter_mode filter_mode; + if (mem.info.interpolation == INTERPOLATION_CLOSEST) { + filter_mode = CU_TR_FILTER_MODE_POINT; + } + else { + filter_mode = CU_TR_FILTER_MODE_LINEAR; + } + + /* Image Texture Storage */ + CUarray_format_enum format; + switch (mem.data_type) { + case TYPE_UCHAR: + format = CU_AD_FORMAT_UNSIGNED_INT8; + break; + case TYPE_UINT16: + format = CU_AD_FORMAT_UNSIGNED_INT16; + break; + case TYPE_UINT: + format = CU_AD_FORMAT_UNSIGNED_INT32; + break; + case TYPE_INT: + format = CU_AD_FORMAT_SIGNED_INT32; + break; + case TYPE_FLOAT: + format = CU_AD_FORMAT_FLOAT; + break; + case TYPE_HALF: + format = CU_AD_FORMAT_HALF; + break; + default: + assert(0); + return; + } + + CUDAMem *cmem = NULL; + CUarray array_3d = NULL; + size_t src_pitch = mem.data_width * dsize * mem.data_elements; + size_t dst_pitch = src_pitch; + + if (mem.data_depth > 1) { + /* 3D texture using array, there is no API for linear memory. */ + CUDA_ARRAY3D_DESCRIPTOR desc; + + desc.Width = mem.data_width; + desc.Height = mem.data_height; + desc.Depth = mem.data_depth; + desc.Format = format; + desc.NumChannels = mem.data_elements; + desc.Flags = 0; + + VLOG(1) << "Array 3D allocate: " << mem.name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; + + cuda_assert(cuArray3DCreate(&array_3d, &desc)); + + if (!array_3d) { + return; + } + + CUDA_MEMCPY3D param; + memset(¶m, 0, sizeof(param)); + param.dstMemoryType = CU_MEMORYTYPE_ARRAY; + param.dstArray = array_3d; + param.srcMemoryType = CU_MEMORYTYPE_HOST; + param.srcHost = mem.host_pointer; + param.srcPitch = src_pitch; + param.WidthInBytes = param.srcPitch; + param.Height = mem.data_height; + param.Depth = mem.data_depth; + + cuda_assert(cuMemcpy3D(¶m)); + + mem.device_pointer = (device_ptr)array_3d; + mem.device_size = size; + stats.mem_alloc(size); + + cmem = &cuda_mem_map[&mem]; + cmem->texobject = 0; + cmem->array = array_3d; + } + else if (mem.data_height > 0) { + /* 2D texture, using pitch aligned linear memory. */ + int alignment = 0; + cuda_assert( + cuDeviceGetAttribute(&alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice)); + dst_pitch = align_up(src_pitch, alignment); + size_t dst_size = dst_pitch * mem.data_height; + + cmem = generic_alloc(mem, dst_size - mem.memory_size()); + if (!cmem) { + return; + } + + CUDA_MEMCPY2D param; + memset(¶m, 0, sizeof(param)); + param.dstMemoryType = CU_MEMORYTYPE_DEVICE; + param.dstDevice = mem.device_pointer; + param.dstPitch = dst_pitch; + param.srcMemoryType = CU_MEMORYTYPE_HOST; + param.srcHost = mem.host_pointer; + param.srcPitch = src_pitch; + param.WidthInBytes = param.srcPitch; + param.Height = mem.data_height; + + cuda_assert(cuMemcpy2DUnaligned(¶m)); + } + else { + /* 1D texture, using linear memory. */ + cmem = generic_alloc(mem); + if (!cmem) { + return; + } + + cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size)); + } + + /* Kepler+, bindless textures. */ + CUDA_RESOURCE_DESC resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + + if (array_3d) { + resDesc.resType = CU_RESOURCE_TYPE_ARRAY; + resDesc.res.array.hArray = array_3d; + resDesc.flags = 0; + } + else if (mem.data_height > 0) { + resDesc.resType = CU_RESOURCE_TYPE_PITCH2D; + resDesc.res.pitch2D.devPtr = mem.device_pointer; + resDesc.res.pitch2D.format = format; + resDesc.res.pitch2D.numChannels = mem.data_elements; + resDesc.res.pitch2D.height = mem.data_height; + resDesc.res.pitch2D.width = mem.data_width; + resDesc.res.pitch2D.pitchInBytes = dst_pitch; + } + else { + resDesc.resType = CU_RESOURCE_TYPE_LINEAR; + resDesc.res.linear.devPtr = mem.device_pointer; + resDesc.res.linear.format = format; + resDesc.res.linear.numChannels = mem.data_elements; + resDesc.res.linear.sizeInBytes = mem.device_size; + } + + CUDA_TEXTURE_DESC texDesc; + memset(&texDesc, 0, sizeof(texDesc)); + texDesc.addressMode[0] = address_mode; + texDesc.addressMode[1] = address_mode; + texDesc.addressMode[2] = address_mode; + texDesc.filterMode = filter_mode; + texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES; + + cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL)); + + /* Resize once */ + const uint slot = mem.slot; + if (slot >= texture_info.size()) { + /* Allocate some slots in advance, to reduce amount + * of re-allocations. */ + texture_info.resize(slot + 128); + } + + /* Set Mapping and tag that we need to (re-)upload to device */ + texture_info[slot] = mem.info; + texture_info[slot].data = (uint64_t)cmem->texobject; + need_texture_info = true; +} + +void CUDADevice::tex_free(device_texture &mem) +{ + if (mem.device_pointer) { + CUDAContextScope scope(this); + const CUDAMem &cmem = cuda_mem_map[&mem]; + + if (cmem.texobject) { + /* Free bindless texture. */ + cuTexObjectDestroy(cmem.texobject); + } + + if (cmem.array) { + /* Free array. */ + cuArrayDestroy(cmem.array); + stats.mem_free(mem.device_size); + mem.device_pointer = 0; + mem.device_size = 0; + + cuda_mem_map.erase(cuda_mem_map.find(&mem)); + } + else { + generic_free(mem); + } + } +} + +# define CUDA_GET_BLOCKSIZE(func, w, h) \ + int threads_per_block; \ + cuda_assert( \ + cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \ + int threads = (int)sqrt((float)threads_per_block); \ + int xblocks = ((w) + threads - 1) / threads; \ + int yblocks = ((h) + threads - 1) / threads; + +# define CUDA_LAUNCH_KERNEL(func, args) \ + cuda_assert(cuLaunchKernel(func, xblocks, yblocks, 1, threads, threads, 1, 0, 0, args, 0)); + +/* Similar as above, but for 1-dimensional blocks. */ +# define CUDA_GET_BLOCKSIZE_1D(func, w, h) \ + int threads_per_block; \ + cuda_assert( \ + cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \ + int xblocks = ((w) + threads_per_block - 1) / threads_per_block; \ + int yblocks = h; + +# define CUDA_LAUNCH_KERNEL_1D(func, args) \ + cuda_assert(cuLaunchKernel(func, xblocks, yblocks, 1, threads_per_block, 1, 1, 0, 0, args, 0)); + +bool CUDADevice::denoising_non_local_means(device_ptr image_ptr, + device_ptr guide_ptr, + device_ptr variance_ptr, + device_ptr out_ptr, + DenoisingTask *task) +{ + if (have_error()) + return false; + + CUDAContextScope scope(this); + + int stride = task->buffer.stride; + int w = task->buffer.width; + int h = task->buffer.h; + int r = task->nlm_state.r; + int f = task->nlm_state.f; + float a = task->nlm_state.a; + float k_2 = task->nlm_state.k_2; + + int pass_stride = task->buffer.pass_stride; + int num_shifts = (2 * r + 1) * (2 * r + 1); + int channel_offset = task->nlm_state.is_color ? task->buffer.pass_stride : 0; + int frame_offset = 0; + + if (have_error()) + return false; + + CUdeviceptr difference = (CUdeviceptr)task->buffer.temporary_mem.device_pointer; + CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts; + CUdeviceptr weightAccum = difference + 2 * sizeof(float) * pass_stride * num_shifts; + CUdeviceptr scale_ptr = 0; + + cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float) * pass_stride)); + cuda_assert(cuMemsetD8(out_ptr, 0, sizeof(float) * pass_stride)); + + { + CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMUpdateOutput; + cuda_assert(cuModuleGetFunction( + &cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference")); + cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur")); + cuda_assert(cuModuleGetFunction( + &cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight")); + cuda_assert(cuModuleGetFunction( + &cuNLMUpdateOutput, cuFilterModule, "kernel_cuda_filter_nlm_update_output")); + + cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1)); + cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1)); + cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1)); + cuda_assert(cuFuncSetCacheConfig(cuNLMUpdateOutput, CU_FUNC_CACHE_PREFER_L1)); + + CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, w * h, num_shifts); + + void *calc_difference_args[] = {&guide_ptr, + &variance_ptr, + &scale_ptr, + &difference, + &w, + &h, + &stride, + &pass_stride, + &r, + &channel_offset, + &frame_offset, + &a, + &k_2}; + void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f}; + void *calc_weight_args[] = { + &blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f}; + void *update_output_args[] = {&blurDifference, + &image_ptr, + &out_ptr, + &weightAccum, + &w, + &h, + &stride, + &pass_stride, + &channel_offset, + &r, + &f}; + + CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args); + CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args); + CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args); + CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args); + CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args); + } + + { + CUfunction cuNLMNormalize; + cuda_assert( + cuModuleGetFunction(&cuNLMNormalize, cuFilterModule, "kernel_cuda_filter_nlm_normalize")); + cuda_assert(cuFuncSetCacheConfig(cuNLMNormalize, CU_FUNC_CACHE_PREFER_L1)); + void *normalize_args[] = {&out_ptr, &weightAccum, &w, &h, &stride}; + CUDA_GET_BLOCKSIZE(cuNLMNormalize, w, h); + CUDA_LAUNCH_KERNEL(cuNLMNormalize, normalize_args); + cuda_assert(cuCtxSynchronize()); + } + + return !have_error(); +} + +bool CUDADevice::denoising_construct_transform(DenoisingTask *task) +{ + if (have_error()) + return false; + + CUDAContextScope scope(this); + + CUfunction cuFilterConstructTransform; + cuda_assert(cuModuleGetFunction( + &cuFilterConstructTransform, cuFilterModule, "kernel_cuda_filter_construct_transform")); + cuda_assert(cuFuncSetCacheConfig(cuFilterConstructTransform, CU_FUNC_CACHE_PREFER_SHARED)); + CUDA_GET_BLOCKSIZE(cuFilterConstructTransform, task->storage.w, task->storage.h); + + void *args[] = {&task->buffer.mem.device_pointer, + &task->tile_info_mem.device_pointer, + &task->storage.transform.device_pointer, + &task->storage.rank.device_pointer, + &task->filter_area, + &task->rect, + &task->radius, + &task->pca_threshold, + &task->buffer.pass_stride, + &task->buffer.frame_stride, + &task->buffer.use_time}; + CUDA_LAUNCH_KERNEL(cuFilterConstructTransform, args); + cuda_assert(cuCtxSynchronize()); + + return !have_error(); +} + +bool CUDADevice::denoising_accumulate(device_ptr color_ptr, + device_ptr color_variance_ptr, + device_ptr scale_ptr, + int frame, + DenoisingTask *task) +{ + if (have_error()) + return false; + + CUDAContextScope scope(this); + + int r = task->radius; + int f = 4; + float a = 1.0f; + float k_2 = task->nlm_k_2; + + int w = task->reconstruction_state.source_w; + int h = task->reconstruction_state.source_h; + int stride = task->buffer.stride; + int frame_offset = frame * task->buffer.frame_stride; + int t = task->tile_info->frames[frame]; + + int pass_stride = task->buffer.pass_stride; + int num_shifts = (2 * r + 1) * (2 * r + 1); + + if (have_error()) + return false; + + CUdeviceptr difference = (CUdeviceptr)task->buffer.temporary_mem.device_pointer; + CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts; + + CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian; + cuda_assert(cuModuleGetFunction( + &cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference")); + cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur")); + cuda_assert( + cuModuleGetFunction(&cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight")); + cuda_assert(cuModuleGetFunction( + &cuNLMConstructGramian, cuFilterModule, "kernel_cuda_filter_nlm_construct_gramian")); + + cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1)); + cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1)); + cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1)); + cuda_assert(cuFuncSetCacheConfig(cuNLMConstructGramian, CU_FUNC_CACHE_PREFER_SHARED)); + + CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, + task->reconstruction_state.source_w * task->reconstruction_state.source_h, + num_shifts); + + void *calc_difference_args[] = {&color_ptr, + &color_variance_ptr, + &scale_ptr, + &difference, + &w, + &h, + &stride, + &pass_stride, + &r, + &pass_stride, + &frame_offset, + &a, + &k_2}; + void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f}; + void *calc_weight_args[] = {&blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f}; + void *construct_gramian_args[] = {&t, + &blurDifference, + &task->buffer.mem.device_pointer, + &task->storage.transform.device_pointer, + &task->storage.rank.device_pointer, + &task->storage.XtWX.device_pointer, + &task->storage.XtWY.device_pointer, + &task->reconstruction_state.filter_window, + &w, + &h, + &stride, + &pass_stride, + &r, + &f, + &frame_offset, + &task->buffer.use_time}; + + CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args); + CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args); + CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args); + CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args); + CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args); + cuda_assert(cuCtxSynchronize()); + + return !have_error(); +} + +bool CUDADevice::denoising_solve(device_ptr output_ptr, DenoisingTask *task) +{ + CUfunction cuFinalize; + cuda_assert(cuModuleGetFunction(&cuFinalize, cuFilterModule, "kernel_cuda_filter_finalize")); + cuda_assert(cuFuncSetCacheConfig(cuFinalize, CU_FUNC_CACHE_PREFER_L1)); + void *finalize_args[] = {&output_ptr, + &task->storage.rank.device_pointer, + &task->storage.XtWX.device_pointer, + &task->storage.XtWY.device_pointer, + &task->filter_area, + &task->reconstruction_state.buffer_params.x, + &task->render_buffer.samples}; + CUDA_GET_BLOCKSIZE( + cuFinalize, task->reconstruction_state.source_w, task->reconstruction_state.source_h); + CUDA_LAUNCH_KERNEL(cuFinalize, finalize_args); + cuda_assert(cuCtxSynchronize()); + + return !have_error(); +} + +bool CUDADevice::denoising_combine_halves(device_ptr a_ptr, + device_ptr b_ptr, + device_ptr mean_ptr, + device_ptr variance_ptr, + int r, + int4 rect, + DenoisingTask *task) +{ + if (have_error()) + return false; + + CUDAContextScope scope(this); + + CUfunction cuFilterCombineHalves; + cuda_assert(cuModuleGetFunction( + &cuFilterCombineHalves, cuFilterModule, "kernel_cuda_filter_combine_halves")); + cuda_assert(cuFuncSetCacheConfig(cuFilterCombineHalves, CU_FUNC_CACHE_PREFER_L1)); + CUDA_GET_BLOCKSIZE( + cuFilterCombineHalves, task->rect.z - task->rect.x, task->rect.w - task->rect.y); + + void *args[] = {&mean_ptr, &variance_ptr, &a_ptr, &b_ptr, &rect, &r}; + CUDA_LAUNCH_KERNEL(cuFilterCombineHalves, args); + cuda_assert(cuCtxSynchronize()); + + return !have_error(); +} + +bool CUDADevice::denoising_divide_shadow(device_ptr a_ptr, + device_ptr b_ptr, + device_ptr sample_variance_ptr, + device_ptr sv_variance_ptr, + device_ptr buffer_variance_ptr, + DenoisingTask *task) +{ + if (have_error()) + return false; + + CUDAContextScope scope(this); + + CUfunction cuFilterDivideShadow; + cuda_assert(cuModuleGetFunction( + &cuFilterDivideShadow, cuFilterModule, "kernel_cuda_filter_divide_shadow")); + cuda_assert(cuFuncSetCacheConfig(cuFilterDivideShadow, CU_FUNC_CACHE_PREFER_L1)); + CUDA_GET_BLOCKSIZE( + cuFilterDivideShadow, task->rect.z - task->rect.x, task->rect.w - task->rect.y); + + void *args[] = {&task->render_buffer.samples, + &task->tile_info_mem.device_pointer, + &a_ptr, + &b_ptr, + &sample_variance_ptr, + &sv_variance_ptr, + &buffer_variance_ptr, + &task->rect, + &task->render_buffer.pass_stride, + &task->render_buffer.offset}; + CUDA_LAUNCH_KERNEL(cuFilterDivideShadow, args); + cuda_assert(cuCtxSynchronize()); + + return !have_error(); +} + +bool CUDADevice::denoising_get_feature(int mean_offset, + int variance_offset, + device_ptr mean_ptr, + device_ptr variance_ptr, + float scale, + DenoisingTask *task) +{ + if (have_error()) + return false; + + CUDAContextScope scope(this); + + CUfunction cuFilterGetFeature; + cuda_assert( + cuModuleGetFunction(&cuFilterGetFeature, cuFilterModule, "kernel_cuda_filter_get_feature")); + cuda_assert(cuFuncSetCacheConfig(cuFilterGetFeature, CU_FUNC_CACHE_PREFER_L1)); + CUDA_GET_BLOCKSIZE(cuFilterGetFeature, task->rect.z - task->rect.x, task->rect.w - task->rect.y); + + void *args[] = {&task->render_buffer.samples, + &task->tile_info_mem.device_pointer, + &mean_offset, + &variance_offset, + &mean_ptr, + &variance_ptr, + &scale, + &task->rect, + &task->render_buffer.pass_stride, + &task->render_buffer.offset}; + CUDA_LAUNCH_KERNEL(cuFilterGetFeature, args); + cuda_assert(cuCtxSynchronize()); + + return !have_error(); +} + +bool CUDADevice::denoising_write_feature(int out_offset, + device_ptr from_ptr, + device_ptr buffer_ptr, + DenoisingTask *task) +{ + if (have_error()) + return false; + + CUDAContextScope scope(this); + + CUfunction cuFilterWriteFeature; + cuda_assert(cuModuleGetFunction( + &cuFilterWriteFeature, cuFilterModule, "kernel_cuda_filter_write_feature")); + cuda_assert(cuFuncSetCacheConfig(cuFilterWriteFeature, CU_FUNC_CACHE_PREFER_L1)); + CUDA_GET_BLOCKSIZE(cuFilterWriteFeature, task->filter_area.z, task->filter_area.w); + + void *args[] = {&task->render_buffer.samples, + &task->reconstruction_state.buffer_params, + &task->filter_area, + &from_ptr, + &buffer_ptr, + &out_offset, + &task->rect}; + CUDA_LAUNCH_KERNEL(cuFilterWriteFeature, args); + cuda_assert(cuCtxSynchronize()); + + return !have_error(); +} + +bool CUDADevice::denoising_detect_outliers(device_ptr image_ptr, + device_ptr variance_ptr, + device_ptr depth_ptr, + device_ptr output_ptr, + DenoisingTask *task) +{ + if (have_error()) + return false; + + CUDAContextScope scope(this); + + CUfunction cuFilterDetectOutliers; + cuda_assert(cuModuleGetFunction( + &cuFilterDetectOutliers, cuFilterModule, "kernel_cuda_filter_detect_outliers")); + cuda_assert(cuFuncSetCacheConfig(cuFilterDetectOutliers, CU_FUNC_CACHE_PREFER_L1)); + CUDA_GET_BLOCKSIZE( + cuFilterDetectOutliers, task->rect.z - task->rect.x, task->rect.w - task->rect.y); + + void *args[] = { + &image_ptr, &variance_ptr, &depth_ptr, &output_ptr, &task->rect, &task->buffer.pass_stride}; + + CUDA_LAUNCH_KERNEL(cuFilterDetectOutliers, args); + cuda_assert(cuCtxSynchronize()); + + return !have_error(); +} + +void CUDADevice::denoise(RenderTile &rtile, DenoisingTask &denoising) +{ + denoising.functions.construct_transform = function_bind( + &CUDADevice::denoising_construct_transform, this, &denoising); + denoising.functions.accumulate = function_bind( + &CUDADevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising); + denoising.functions.solve = function_bind(&CUDADevice::denoising_solve, this, _1, &denoising); + denoising.functions.divide_shadow = function_bind( + &CUDADevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising); + denoising.functions.non_local_means = function_bind( + &CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising); + denoising.functions.combine_halves = function_bind( + &CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); + denoising.functions.get_feature = function_bind( + &CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising); + denoising.functions.write_feature = function_bind( + &CUDADevice::denoising_write_feature, this, _1, _2, _3, &denoising); + denoising.functions.detect_outliers = function_bind( + &CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising); + + denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h); + denoising.render_buffer.samples = rtile.sample; + denoising.buffer.gpu_temporary_mem = true; + + denoising.run_denoising(&rtile); +} + +void CUDADevice::adaptive_sampling_filter(uint filter_sample, + WorkTile *wtile, + CUdeviceptr d_wtile, + CUstream stream) +{ + const int num_threads_per_block = functions.adaptive_num_threads_per_block; + + /* These are a series of tiny kernels because there is no grid synchronization + * from within a kernel, so multiple kernel launches it is. */ + uint total_work_size = wtile->h * wtile->w; + void *args2[] = {&d_wtile, &filter_sample, &total_work_size}; + uint num_blocks = divide_up(total_work_size, num_threads_per_block); + cuda_assert(cuLaunchKernel(functions.adaptive_stopping, + num_blocks, + 1, + 1, + num_threads_per_block, + 1, + 1, + 0, + stream, + args2, + 0)); + total_work_size = wtile->h; + num_blocks = divide_up(total_work_size, num_threads_per_block); + cuda_assert(cuLaunchKernel(functions.adaptive_filter_x, + num_blocks, + 1, + 1, + num_threads_per_block, + 1, + 1, + 0, + stream, + args2, + 0)); + total_work_size = wtile->w; + num_blocks = divide_up(total_work_size, num_threads_per_block); + cuda_assert(cuLaunchKernel(functions.adaptive_filter_y, + num_blocks, + 1, + 1, + num_threads_per_block, + 1, + 1, + 0, + stream, + args2, + 0)); +} + +void CUDADevice::adaptive_sampling_post(RenderTile &rtile, + WorkTile *wtile, + CUdeviceptr d_wtile, + CUstream stream) +{ + const int num_threads_per_block = functions.adaptive_num_threads_per_block; + uint total_work_size = wtile->h * wtile->w; + + void *args[] = {&d_wtile, &rtile.start_sample, &rtile.sample, &total_work_size}; + uint num_blocks = divide_up(total_work_size, num_threads_per_block); + cuda_assert(cuLaunchKernel(functions.adaptive_scale_samples, + num_blocks, + 1, + 1, + num_threads_per_block, + 1, + 1, + 0, + stream, + args, + 0)); +} + +void CUDADevice::path_trace(DeviceTask &task, + RenderTile &rtile, + device_vector<WorkTile> &work_tiles) +{ + scoped_timer timer(&rtile.buffers->render_time); + + if (have_error()) + return; + + CUDAContextScope scope(this); + CUfunction cuPathTrace; + + /* Get kernel function. */ + if (task.integrator_branched) { + cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace")); + } + else { + cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace")); + } + + if (have_error()) { + return; + } + + cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)); + + /* Allocate work tile. */ + work_tiles.alloc(1); + + WorkTile *wtile = work_tiles.data(); + wtile->x = rtile.x; + wtile->y = rtile.y; + wtile->w = rtile.w; + wtile->h = rtile.h; + wtile->offset = rtile.offset; + wtile->stride = rtile.stride; + wtile->buffer = (float *)(CUdeviceptr)rtile.buffer; + + /* Prepare work size. More step samples render faster, but for now we + * remain conservative for GPUs connected to a display to avoid driver + * timeouts and display freezing. */ + int min_blocks, num_threads_per_block; + cuda_assert(cuOccupancyMaxPotentialBlockSize( + &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0)); + if (!info.display_device) { + min_blocks *= 8; + } + + uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h); + if (task.adaptive_sampling.use) { + step_samples = task.adaptive_sampling.align_static_samples(step_samples); + } + + /* Render all samples. */ + int start_sample = rtile.start_sample; + int end_sample = rtile.start_sample + rtile.num_samples; + + for (int sample = start_sample; sample < end_sample; sample += step_samples) { + /* Setup and copy work tile to device. */ + wtile->start_sample = sample; + wtile->num_samples = min(step_samples, end_sample - sample); + work_tiles.copy_to_device(); + + CUdeviceptr d_work_tiles = (CUdeviceptr)work_tiles.device_pointer; + uint total_work_size = wtile->w * wtile->h * wtile->num_samples; + uint num_blocks = divide_up(total_work_size, num_threads_per_block); + + /* Launch kernel. */ + void *args[] = {&d_work_tiles, &total_work_size}; + + cuda_assert( + cuLaunchKernel(cuPathTrace, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0)); + + /* Run the adaptive sampling kernels at selected samples aligned to step samples. */ + uint filter_sample = sample + wtile->num_samples - 1; + if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(filter_sample)) { + adaptive_sampling_filter(filter_sample, wtile, d_work_tiles); + } + + cuda_assert(cuCtxSynchronize()); + + /* Update progress. */ + rtile.sample = sample + wtile->num_samples; + task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples); + + if (task.get_cancel()) { + if (task.need_finish_queue == false) + break; + } + } + + /* Finalize adaptive sampling. */ + if (task.adaptive_sampling.use) { + CUdeviceptr d_work_tiles = (CUdeviceptr)work_tiles.device_pointer; + adaptive_sampling_post(rtile, wtile, d_work_tiles); + cuda_assert(cuCtxSynchronize()); + task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples); + } +} + +void CUDADevice::film_convert(DeviceTask &task, + device_ptr buffer, + device_ptr rgba_byte, + device_ptr rgba_half) +{ + if (have_error()) + return; + + CUDAContextScope scope(this); + + CUfunction cuFilmConvert; + CUdeviceptr d_rgba = map_pixels((rgba_byte) ? rgba_byte : rgba_half); + CUdeviceptr d_buffer = (CUdeviceptr)buffer; + + /* get kernel function */ + if (rgba_half) { + cuda_assert( + cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_half_float")); + } + else { + cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte")); + } + + float sample_scale = 1.0f / (task.sample + 1); + + /* pass in parameters */ + void *args[] = {&d_rgba, + &d_buffer, + &sample_scale, + &task.x, + &task.y, + &task.w, + &task.h, + &task.offset, + &task.stride}; + + /* launch kernel */ + int threads_per_block; + cuda_assert(cuFuncGetAttribute( + &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilmConvert)); + + int xthreads = (int)sqrt(threads_per_block); + int ythreads = (int)sqrt(threads_per_block); + int xblocks = (task.w + xthreads - 1) / xthreads; + int yblocks = (task.h + ythreads - 1) / ythreads; + + cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1)); + + cuda_assert(cuLaunchKernel(cuFilmConvert, + xblocks, + yblocks, + 1, /* blocks */ + xthreads, + ythreads, + 1, /* threads */ + 0, + 0, + args, + 0)); + + unmap_pixels((rgba_byte) ? rgba_byte : rgba_half); + + cuda_assert(cuCtxSynchronize()); +} + +void CUDADevice::shader(DeviceTask &task) +{ + if (have_error()) + return; + + CUDAContextScope scope(this); + + CUfunction cuShader; + CUdeviceptr d_input = (CUdeviceptr)task.shader_input; + CUdeviceptr d_output = (CUdeviceptr)task.shader_output; + + /* get kernel function */ + if (task.shader_eval_type >= SHADER_EVAL_BAKE) { + cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake")); + } + else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) { + cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_displace")); + } + else { + cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_background")); + } + + /* do tasks in smaller chunks, so we can cancel it */ + const int shader_chunk_size = 65536; + const int start = task.shader_x; + const int end = task.shader_x + task.shader_w; + int offset = task.offset; + + bool canceled = false; + for (int sample = 0; sample < task.num_samples && !canceled; sample++) { + for (int shader_x = start; shader_x < end; shader_x += shader_chunk_size) { + int shader_w = min(shader_chunk_size, end - shader_x); + + /* pass in parameters */ + void *args[8]; + int arg = 0; + args[arg++] = &d_input; + args[arg++] = &d_output; + args[arg++] = &task.shader_eval_type; + if (task.shader_eval_type >= SHADER_EVAL_BAKE) { + args[arg++] = &task.shader_filter; + } + args[arg++] = &shader_x; + args[arg++] = &shader_w; + args[arg++] = &offset; + args[arg++] = &sample; + + /* launch kernel */ + int threads_per_block; + cuda_assert(cuFuncGetAttribute( + &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader)); + + int xblocks = (shader_w + threads_per_block - 1) / threads_per_block; + + cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1)); + cuda_assert(cuLaunchKernel(cuShader, + xblocks, + 1, + 1, /* blocks */ + threads_per_block, + 1, + 1, /* threads */ + 0, + 0, + args, + 0)); + + cuda_assert(cuCtxSynchronize()); + + if (task.get_cancel()) { + canceled = true; + break; + } + } + + task.update_progress(NULL); + } +} + +CUdeviceptr CUDADevice::map_pixels(device_ptr mem) +{ + if (!background) { + PixelMem pmem = pixel_mem_map[mem]; + CUdeviceptr buffer; + + size_t bytes; + cuda_assert(cuGraphicsMapResources(1, &pmem.cuPBOresource, 0)); + cuda_assert(cuGraphicsResourceGetMappedPointer(&buffer, &bytes, pmem.cuPBOresource)); + + return buffer; + } + + return (CUdeviceptr)mem; +} + +void CUDADevice::unmap_pixels(device_ptr mem) +{ + if (!background) { + PixelMem pmem = pixel_mem_map[mem]; + + cuda_assert(cuGraphicsUnmapResources(1, &pmem.cuPBOresource, 0)); + } +} + +void CUDADevice::pixels_alloc(device_memory &mem) +{ + PixelMem pmem; + + pmem.w = mem.data_width; + pmem.h = mem.data_height; + + CUDAContextScope scope(this); + + glGenBuffers(1, &pmem.cuPBO); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); + if (mem.data_type == TYPE_HALF) + glBufferData( + GL_PIXEL_UNPACK_BUFFER, pmem.w * pmem.h * sizeof(GLhalf) * 4, NULL, GL_DYNAMIC_DRAW); + else + glBufferData( + GL_PIXEL_UNPACK_BUFFER, pmem.w * pmem.h * sizeof(uint8_t) * 4, NULL, GL_DYNAMIC_DRAW); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + + glActiveTexture(GL_TEXTURE0); + glGenTextures(1, &pmem.cuTexId); + glBindTexture(GL_TEXTURE_2D, pmem.cuTexId); + if (mem.data_type == TYPE_HALF) + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL); + else + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glBindTexture(GL_TEXTURE_2D, 0); + + CUresult result = cuGraphicsGLRegisterBuffer( + &pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE); + + if (result == CUDA_SUCCESS) { + mem.device_pointer = pmem.cuTexId; + pixel_mem_map[mem.device_pointer] = pmem; + + mem.device_size = mem.memory_size(); + stats.mem_alloc(mem.device_size); + + return; + } + else { + /* failed to register buffer, fallback to no interop */ + glDeleteBuffers(1, &pmem.cuPBO); + glDeleteTextures(1, &pmem.cuTexId); + + background = true; + } +} + +void CUDADevice::pixels_copy_from(device_memory &mem, int y, int w, int h) +{ + PixelMem pmem = pixel_mem_map[mem.device_pointer]; + + CUDAContextScope scope(this); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); + uchar *pixels = (uchar *)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY); + size_t offset = sizeof(uchar) * 4 * y * w; + memcpy((uchar *)mem.host_pointer + offset, pixels + offset, sizeof(uchar) * 4 * w * h); + glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); +} + +void CUDADevice::pixels_free(device_memory &mem) +{ + if (mem.device_pointer) { + PixelMem pmem = pixel_mem_map[mem.device_pointer]; + + CUDAContextScope scope(this); + + cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource)); + glDeleteBuffers(1, &pmem.cuPBO); + glDeleteTextures(1, &pmem.cuTexId); + + pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer)); + mem.device_pointer = 0; + + stats.mem_free(mem.device_size); + mem.device_size = 0; + } +} + +void CUDADevice::draw_pixels(device_memory &mem, + int y, + int w, + int h, + int width, + int height, + int dx, + int dy, + int dw, + int dh, + bool transparent, + const DeviceDrawParams &draw_params) +{ + assert(mem.type == MEM_PIXELS); + + if (!background) { + const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL); + PixelMem pmem = pixel_mem_map[mem.device_pointer]; + float *vpointer; + + CUDAContextScope scope(this); + + /* for multi devices, this assumes the inefficient method that we allocate + * all pixels on the device even though we only render to a subset */ + size_t offset = 4 * y * w; + + if (mem.data_type == TYPE_HALF) + offset *= sizeof(GLhalf); + else + offset *= sizeof(uint8_t); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, pmem.cuTexId); + if (mem.data_type == TYPE_HALF) { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_HALF_FLOAT, (void *)offset); + } + else { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, (void *)offset); + } + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + + if (transparent) { + glEnable(GL_BLEND); + glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA); + } + + GLint shader_program; + if (use_fallback_shader) { + if (!bind_fallback_display_space_shader(dw, dh)) { + return; + } + shader_program = fallback_shader_program; + } + else { + draw_params.bind_display_space_shader_cb(); + glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program); + } + + if (!vertex_buffer) { + glGenBuffers(1, &vertex_buffer); + } + + glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer); + /* invalidate old contents - + * avoids stalling if buffer is still waiting in queue to be rendered */ + glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW); + + vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY); + + if (vpointer) { + /* texture coordinate - vertex pair */ + vpointer[0] = 0.0f; + vpointer[1] = 0.0f; + vpointer[2] = dx; + vpointer[3] = dy; + + vpointer[4] = (float)w / (float)pmem.w; + vpointer[5] = 0.0f; + vpointer[6] = (float)width + dx; + vpointer[7] = dy; + + vpointer[8] = (float)w / (float)pmem.w; + vpointer[9] = (float)h / (float)pmem.h; + vpointer[10] = (float)width + dx; + vpointer[11] = (float)height + dy; + + vpointer[12] = 0.0f; + vpointer[13] = (float)h / (float)pmem.h; + vpointer[14] = dx; + vpointer[15] = (float)height + dy; + + glUnmapBuffer(GL_ARRAY_BUFFER); + } + + GLuint vertex_array_object; + GLuint position_attribute, texcoord_attribute; + + glGenVertexArrays(1, &vertex_array_object); + glBindVertexArray(vertex_array_object); + + texcoord_attribute = glGetAttribLocation(shader_program, "texCoord"); + position_attribute = glGetAttribLocation(shader_program, "pos"); + + glEnableVertexAttribArray(texcoord_attribute); + glEnableVertexAttribArray(position_attribute); + + glVertexAttribPointer( + texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0); + glVertexAttribPointer(position_attribute, + 2, + GL_FLOAT, + GL_FALSE, + 4 * sizeof(float), + (const GLvoid *)(sizeof(float) * 2)); + + glDrawArrays(GL_TRIANGLE_FAN, 0, 4); + + if (use_fallback_shader) { + glUseProgram(0); + } + else { + draw_params.unbind_display_space_shader_cb(); + } + + if (transparent) { + glDisable(GL_BLEND); + } + + glBindTexture(GL_TEXTURE_2D, 0); + + return; + } + + Device::draw_pixels(mem, y, w, h, width, height, dx, dy, dw, dh, transparent, draw_params); +} + +void CUDADevice::thread_run(DeviceTask *task) +{ + CUDAContextScope scope(this); + + if (task->type == DeviceTask::RENDER) { + DeviceRequestedFeatures requested_features; + if (use_split_kernel()) { + if (split_kernel == NULL) { + split_kernel = new CUDASplitKernel(this); + split_kernel->load_kernels(requested_features); + } + } + + device_vector<WorkTile> work_tiles(this, "work_tiles", MEM_READ_ONLY); + + /* keep rendering tiles until done */ + RenderTile tile; + DenoisingTask denoising(this, *task); + + while (task->acquire_tile(this, tile, task->tile_types)) { + if (tile.task == RenderTile::PATH_TRACE) { + if (use_split_kernel()) { + device_only_memory<uchar> void_buffer(this, "void_buffer"); + split_kernel->path_trace(task, tile, void_buffer, void_buffer); + } + else { + path_trace(*task, tile, work_tiles); + } + } + else if (tile.task == RenderTile::DENOISE) { + tile.sample = tile.start_sample + tile.num_samples; + + denoise(tile, denoising); + + task->update_progress(&tile, tile.w * tile.h); + } + + task->release_tile(tile); + + if (task->get_cancel()) { + if (task->need_finish_queue == false) + break; + } + } + + work_tiles.free(); + } + else if (task->type == DeviceTask::SHADER) { + shader(*task); + + cuda_assert(cuCtxSynchronize()); + } + else if (task->type == DeviceTask::DENOISE_BUFFER) { + RenderTile tile; + tile.x = task->x; + tile.y = task->y; + tile.w = task->w; + tile.h = task->h; + tile.buffer = task->buffer; + tile.sample = task->sample + task->num_samples; + tile.num_samples = task->num_samples; + tile.start_sample = task->sample; + tile.offset = task->offset; + tile.stride = task->stride; + tile.buffers = task->buffers; + + DenoisingTask denoising(this, *task); + denoise(tile, denoising); + task->update_progress(&tile, tile.w * tile.h); + } +} + +class CUDADeviceTask : public DeviceTask { + public: + CUDADeviceTask(CUDADevice *device, DeviceTask &task) : DeviceTask(task) + { + run = function_bind(&CUDADevice::thread_run, device, this); + } +}; + +void CUDADevice::task_add(DeviceTask &task) +{ + CUDAContextScope scope(this); + + /* Load texture info. */ + load_texture_info(); + + /* Synchronize all memory copies before executing task. */ + cuda_assert(cuCtxSynchronize()); + + if (task.type == DeviceTask::FILM_CONVERT) { + /* must be done in main thread due to opengl access */ + film_convert(task, task.buffer, task.rgba_byte, task.rgba_half); + } + else { + task_pool.push(new CUDADeviceTask(this, task)); + } +} + +void CUDADevice::task_wait() +{ + task_pool.wait(); +} + +void CUDADevice::task_cancel() +{ + task_pool.cancel(); +} + +/* redefine the cuda_assert macro so it can be used outside of the CUDADevice class + * now that the definition of that class is complete + */ +# undef cuda_assert +# define cuda_assert(stmt) \ + { \ + CUresult result = stmt; \ +\ + if (result != CUDA_SUCCESS) { \ + string message = string_printf("CUDA error: %s in %s", cuewErrorString(result), #stmt); \ + if (device->error_msg == "") \ + device->error_msg = message; \ + fprintf(stderr, "%s\n", message.c_str()); \ + /*cuda_abort();*/ \ + device->cuda_error_documentation(); \ + } \ + } \ + (void)0 + +/* CUDA context scope. */ + +CUDAContextScope::CUDAContextScope(CUDADevice *device) : device(device) +{ + cuda_assert(cuCtxPushCurrent(device->cuContext)); +} + +CUDAContextScope::~CUDAContextScope() +{ + cuda_assert(cuCtxPopCurrent(NULL)); +} + +/* split kernel */ + +class CUDASplitKernelFunction : public SplitKernelFunction { + CUDADevice *device; + CUfunction func; + + public: + CUDASplitKernelFunction(CUDADevice *device, CUfunction func) : device(device), func(func) + { + } + + /* enqueue the kernel, returns false if there is an error */ + bool enqueue(const KernelDimensions &dim, device_memory & /*kg*/, device_memory & /*data*/) + { + return enqueue(dim, NULL); + } + + /* enqueue the kernel, returns false if there is an error */ + bool enqueue(const KernelDimensions &dim, void *args[]) + { + if (device->have_error()) + return false; + + CUDAContextScope scope(device); + + /* we ignore dim.local_size for now, as this is faster */ + int threads_per_block; + cuda_assert( + cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); + + int xblocks = (dim.global_size[0] * dim.global_size[1] + threads_per_block - 1) / + threads_per_block; + + cuda_assert(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1)); + + cuda_assert(cuLaunchKernel(func, + xblocks, + 1, + 1, /* blocks */ + threads_per_block, + 1, + 1, /* threads */ + 0, + 0, + args, + 0)); + + return !device->have_error(); + } +}; + +CUDASplitKernel::CUDASplitKernel(CUDADevice *device) : DeviceSplitKernel(device), device(device) +{ +} + +uint64_t CUDASplitKernel::state_buffer_size(device_memory & /*kg*/, + device_memory & /*data*/, + size_t num_threads) +{ + CUDAContextScope scope(device); + + device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE); + size_buffer.alloc(1); + size_buffer.zero_to_device(); + + uint threads = num_threads; + CUdeviceptr d_size = (CUdeviceptr)size_buffer.device_pointer; + + struct args_t { + uint *num_threads; + CUdeviceptr *size; + }; + + args_t args = {&threads, &d_size}; + + CUfunction state_buffer_size; + cuda_assert( + cuModuleGetFunction(&state_buffer_size, device->cuModule, "kernel_cuda_state_buffer_size")); + + cuda_assert(cuLaunchKernel(state_buffer_size, 1, 1, 1, 1, 1, 1, 0, 0, (void **)&args, 0)); + + size_buffer.copy_from_device(0, 1, 1); + size_t size = size_buffer[0]; + size_buffer.free(); + + return size; +} + +bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions &dim, + RenderTile &rtile, + int num_global_elements, + device_memory & /*kernel_globals*/, + device_memory & /*kernel_data*/, + device_memory &split_data, + device_memory &ray_state, + device_memory &queue_index, + device_memory &use_queues_flag, + device_memory &work_pool_wgs) +{ + CUDAContextScope scope(device); + + CUdeviceptr d_split_data = (CUdeviceptr)split_data.device_pointer; + CUdeviceptr d_ray_state = (CUdeviceptr)ray_state.device_pointer; + CUdeviceptr d_queue_index = (CUdeviceptr)queue_index.device_pointer; + CUdeviceptr d_use_queues_flag = (CUdeviceptr)use_queues_flag.device_pointer; + CUdeviceptr d_work_pool_wgs = (CUdeviceptr)work_pool_wgs.device_pointer; + + CUdeviceptr d_buffer = (CUdeviceptr)rtile.buffer; + + int end_sample = rtile.start_sample + rtile.num_samples; + int queue_size = dim.global_size[0] * dim.global_size[1]; + + struct args_t { + CUdeviceptr *split_data_buffer; + int *num_elements; + CUdeviceptr *ray_state; + int *start_sample; + int *end_sample; + int *sx; + int *sy; + int *sw; + int *sh; + int *offset; + int *stride; + CUdeviceptr *queue_index; + int *queuesize; + CUdeviceptr *use_queues_flag; + CUdeviceptr *work_pool_wgs; + int *num_samples; + CUdeviceptr *buffer; + }; + + args_t args = {&d_split_data, + &num_global_elements, + &d_ray_state, + &rtile.start_sample, + &end_sample, + &rtile.x, + &rtile.y, + &rtile.w, + &rtile.h, + &rtile.offset, + &rtile.stride, + &d_queue_index, + &queue_size, + &d_use_queues_flag, + &d_work_pool_wgs, + &rtile.num_samples, + &d_buffer}; + + CUfunction data_init; + cuda_assert( + cuModuleGetFunction(&data_init, device->cuModule, "kernel_cuda_path_trace_data_init")); + if (device->have_error()) { + return false; + } + + CUDASplitKernelFunction(device, data_init).enqueue(dim, (void **)&args); + + return !device->have_error(); +} + +SplitKernelFunction *CUDASplitKernel::get_split_kernel_function(const string &kernel_name, + const DeviceRequestedFeatures &) +{ + CUDAContextScope scope(device); + CUfunction func; + + cuda_assert( + cuModuleGetFunction(&func, device->cuModule, (string("kernel_cuda_") + kernel_name).data())); + if (device->have_error()) { + device->cuda_error_message( + string_printf("kernel \"kernel_cuda_%s\" not found in module", kernel_name.data())); + return NULL; + } + + return new CUDASplitKernelFunction(device, func); +} + +int2 CUDASplitKernel::split_kernel_local_size() +{ + return make_int2(32, 1); +} + +int2 CUDASplitKernel::split_kernel_global_size(device_memory &kg, + device_memory &data, + DeviceTask * /*task*/) +{ + CUDAContextScope scope(device); + size_t free; + size_t total; + + cuda_assert(cuMemGetInfo(&free, &total)); + + VLOG(1) << "Maximum device allocation size: " << string_human_readable_number(free) + << " bytes. (" << string_human_readable_size(free) << ")."; + + size_t num_elements = max_elements_for_max_buffer_size(kg, data, free / 2); + size_t side = round_down((int)sqrt(num_elements), 32); + int2 global_size = make_int2(side, round_down(num_elements / side, 16)); + VLOG(1) << "Global size: " << global_size << "."; + return global_size; +} + +CCL_NAMESPACE_END + +#endif diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 76670351734..d94d409175b 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -25,11 +25,11 @@ #include "util/util_logging.h" #include "util/util_math.h" #include "util/util_opengl.h" -#include "util/util_time.h" +#include "util/util_string.h" #include "util/util_system.h" +#include "util/util_time.h" #include "util/util_types.h" #include "util/util_vector.h" -#include "util/util_string.h" CCL_NAMESPACE_BEGIN @@ -366,6 +366,15 @@ void Device::draw_pixels(device_memory &rgba, Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background) { +#ifdef WITH_MULTI + if (!info.multi_devices.empty()) { + /* Always create a multi device when info contains multiple devices. + * This is done so that the type can still be e.g. DEVICE_CPU to indicate + * that it is a homogeneous collection of devices, which simplifies checks. */ + return device_multi_create(info, stats, profiler, background); + } +#endif + Device *device; switch (info.type) { @@ -388,11 +397,6 @@ Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool device = NULL; break; #endif -#ifdef WITH_MULTI - case DEVICE_MULTI: - device = device_multi_create(info, stats, profiler, background); - break; -#endif #ifdef WITH_NETWORK case DEVICE_NETWORK: device = device_network_create(info, stats, profiler, "127.0.0.1"); @@ -586,7 +590,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices, } DeviceInfo info; - info.type = DEVICE_MULTI; + info.type = subdevices.front().type; info.id = "MULTI"; info.description = "Multi Device"; info.num = 0; @@ -624,6 +628,14 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices, info.multi_devices.push_back(device); } + /* Create unique ID for this combination of devices. */ + info.id += device.id; + + /* Set device type to MULTI if subdevices are not of a common type. */ + if (device.type != info.type) { + info.type = DEVICE_MULTI; + } + /* Accumulate device info. */ info.has_half_images &= device.has_half_images; info.has_volume_decoupled &= device.has_volume_decoupled; diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 66fcac921d3..a98ac171709 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -27,8 +27,8 @@ #include "util/util_list.h" #include "util/util_stats.h" #include "util/util_string.h" -#include "util/util_thread.h" #include "util/util_texture.h" +#include "util/util_thread.h" #include "util/util_types.h" #include "util/util_vector.h" @@ -83,6 +83,7 @@ class DeviceInfo { bool has_profiling; /* Supports runtime collection of profiling info. */ int cpu_threads; vector<DeviceInfo> multi_devices; + vector<DeviceInfo> denoising_devices; DeviceInfo() { diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index c2843a61e6d..57e8523e02a 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -29,16 +29,19 @@ #include "device/device_intern.h" #include "device/device_split_kernel.h" +// clang-format off #include "kernel/kernel.h" #include "kernel/kernel_compat_cpu.h" #include "kernel/kernel_types.h" #include "kernel/split/kernel_split_data.h" #include "kernel/kernel_globals.h" +#include "kernel/kernel_adaptive_sampling.h" #include "kernel/filter/filter.h" #include "kernel/osl/osl_shader.h" #include "kernel/osl/osl_globals.h" +// clang-format on #include "render/buffers.h" #include "render/coverage.h" @@ -261,7 +264,7 @@ class CPUDevice : public Device { CPUDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_) : Device(info_, stats_, profiler_, background_), - texture_info(this, "__texture_info", MEM_TEXTURE), + texture_info(this, "__texture_info", MEM_GLOBAL), #define REGISTER_KERNEL(name) name##_kernel(KERNEL_FUNCTIONS(name)) REGISTER_KERNEL(path_trace), REGISTER_KERNEL(convert_to_half_float), @@ -317,6 +320,10 @@ class CPUDevice : public Device { REGISTER_SPLIT_KERNEL(next_iteration_setup); REGISTER_SPLIT_KERNEL(indirect_subsurface); REGISTER_SPLIT_KERNEL(buffer_update); + REGISTER_SPLIT_KERNEL(adaptive_stopping); + REGISTER_SPLIT_KERNEL(adaptive_filter_x); + REGISTER_SPLIT_KERNEL(adaptive_filter_y); + REGISTER_SPLIT_KERNEL(adaptive_adjust_samples); #undef REGISTER_SPLIT_KERNEL #undef KERNEL_FUNCTIONS } @@ -338,7 +345,10 @@ class CPUDevice : public Device { if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) { bvh_layout_mask |= BVH_LAYOUT_BVH4; } -#if defined(__x86_64__) || defined(_M_X64) + /* MSVC does not support the -march=native switch and you always end up */ + /* with an sse2 kernel when you use WITH_KERNEL_NATIVE. We *cannot* feed */ + /* that kernel BVH8 even if the CPU flags would allow for it. */ +#if (defined(__x86_64__) || defined(_M_X64)) && !(defined(_MSC_VER) && defined(WITH_KERNEL_NATIVE)) if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) { bvh_layout_mask |= BVH_LAYOUT_BVH8; } @@ -362,6 +372,9 @@ class CPUDevice : public Device { if (mem.type == MEM_TEXTURE) { assert(!"mem_alloc not supported for textures."); } + else if (mem.type == MEM_GLOBAL) { + assert(!"mem_alloc not supported for global memory."); + } else { if (mem.name) { VLOG(1) << "Buffer allocate: " << mem.name << ", " @@ -386,9 +399,13 @@ class CPUDevice : public Device { void mem_copy_to(device_memory &mem) { - if (mem.type == MEM_TEXTURE) { - tex_free(mem); - tex_alloc(mem); + if (mem.type == MEM_GLOBAL) { + global_free(mem); + global_alloc(mem); + } + else if (mem.type == MEM_TEXTURE) { + tex_free((device_texture &)mem); + tex_alloc((device_texture &)mem); } else if (mem.type == MEM_PIXELS) { assert(!"mem_copy_to not supported for pixels."); @@ -420,8 +437,11 @@ class CPUDevice : public Device { void mem_free(device_memory &mem) { - if (mem.type == MEM_TEXTURE) { - tex_free(mem); + if (mem.type == MEM_GLOBAL) { + global_free(mem); + } + else if (mem.type == MEM_TEXTURE) { + tex_free((device_texture &)mem); } else if (mem.device_pointer) { if (mem.type == MEM_DEVICE_ONLY) { @@ -443,51 +463,50 @@ class CPUDevice : public Device { kernel_const_copy(&kernel_globals, name, host, size); } - void tex_alloc(device_memory &mem) + void global_alloc(device_memory &mem) { - VLOG(1) << "Texture allocate: " << mem.name << ", " + VLOG(1) << "Global memory allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; - if (mem.interpolation == INTERPOLATION_NONE) { - /* Data texture. */ - kernel_tex_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size); - } - else { - /* Image Texture. */ - int flat_slot = 0; - if (string_startswith(mem.name, "__tex_image")) { - int pos = string(mem.name).rfind("_"); - flat_slot = atoi(mem.name + pos + 1); - } - else { - assert(0); - } - - if (flat_slot >= texture_info.size()) { - /* Allocate some slots in advance, to reduce amount - * of re-allocations. */ - texture_info.resize(flat_slot + 128); - } + kernel_global_memory_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size); - TextureInfo &info = texture_info[flat_slot]; - info.data = (uint64_t)mem.host_pointer; - info.cl_buffer = 0; - info.interpolation = mem.interpolation; - info.extension = mem.extension; - info.width = mem.data_width; - info.height = mem.data_height; - info.depth = mem.data_depth; + mem.device_pointer = (device_ptr)mem.host_pointer; + mem.device_size = mem.memory_size(); + stats.mem_alloc(mem.device_size); + } - need_texture_info = true; + void global_free(device_memory &mem) + { + if (mem.device_pointer) { + mem.device_pointer = 0; + stats.mem_free(mem.device_size); + mem.device_size = 0; } + } + + void tex_alloc(device_texture &mem) + { + VLOG(1) << "Texture allocate: " << mem.name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; mem.device_pointer = (device_ptr)mem.host_pointer; mem.device_size = mem.memory_size(); stats.mem_alloc(mem.device_size); + + const uint slot = mem.slot; + if (slot >= texture_info.size()) { + /* Allocate some slots in advance, to reduce amount of re-allocations. */ + texture_info.resize(slot + 128); + } + + texture_info[slot] = mem.info; + texture_info[slot].data = (uint64_t)mem.host_pointer; + need_texture_info = true; } - void tex_free(device_memory &mem) + void tex_free(device_texture &mem) { if (mem.device_pointer) { mem.device_pointer = 0; @@ -508,13 +527,14 @@ class CPUDevice : public Device { void thread_run(DeviceTask *task) { - if (task->type == DeviceTask::RENDER) { + if (task->type == DeviceTask::RENDER) thread_render(*task); - } - else if (task->type == DeviceTask::FILM_CONVERT) - thread_film_convert(*task); else if (task->type == DeviceTask::SHADER) thread_shader(*task); + else if (task->type == DeviceTask::FILM_CONVERT) + thread_film_convert(*task); + else if (task->type == DeviceTask::DENOISE_BUFFER) + thread_denoise(*task); } class CPUDeviceTask : public DeviceTask { @@ -819,6 +839,49 @@ class CPUDevice : public Device { return true; } + bool adaptive_sampling_filter(KernelGlobals *kg, RenderTile &tile) + { + WorkTile wtile; + wtile.x = tile.x; + wtile.y = tile.y; + wtile.w = tile.w; + wtile.h = tile.h; + wtile.offset = tile.offset; + wtile.stride = tile.stride; + wtile.buffer = (float *)tile.buffer; + + bool any = false; + for (int y = tile.y; y < tile.y + tile.h; ++y) { + any |= kernel_do_adaptive_filter_x(kg, y, &wtile); + } + for (int x = tile.x; x < tile.x + tile.w; ++x) { + any |= kernel_do_adaptive_filter_y(kg, x, &wtile); + } + return (!any); + } + + void adaptive_sampling_post(const RenderTile &tile, KernelGlobals *kg) + { + float *render_buffer = (float *)tile.buffer; + for (int y = tile.y; y < tile.y + tile.h; y++) { + for (int x = tile.x; x < tile.x + tile.w; x++) { + int index = tile.offset + x + y * tile.stride; + ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride; + if (buffer[kernel_data.film.pass_sample_count] < 0.0f) { + buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count]; + float sample_multiplier = tile.sample / max((float)tile.start_sample + 1.0f, + buffer[kernel_data.film.pass_sample_count]); + if (sample_multiplier != 1.0f) { + kernel_adaptive_post_adjust(kg, buffer, sample_multiplier); + } + } + else { + kernel_adaptive_post_adjust(kg, buffer, tile.sample / (tile.sample - 1.0f)); + } + } + } + } + void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg) { const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE; @@ -851,14 +914,27 @@ class CPUDevice : public Device { path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride); } } - tile.sample = sample + 1; + if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(sample)) { + const bool stop = adaptive_sampling_filter(kg, tile); + if (stop) { + const int num_progress_samples = end_sample - sample; + tile.sample = end_sample; + task.update_progress(&tile, tile.w * tile.h * num_progress_samples); + break; + } + } + task.update_progress(&tile, tile.w * tile.h); } if (use_coverage) { coverage.finalize(); } + + if (task.adaptive_sampling.use) { + adaptive_sampling_post(tile, kg); + } } void denoise(DenoisingTask &denoising, RenderTile &tile) @@ -923,7 +999,7 @@ class CPUDevice : public Device { DenoisingTask denoising(this, task); denoising.profiler = &kg->profiler; - while (task.acquire_tile(this, tile)) { + while (task.acquire_tile(this, tile, task.tile_types)) { if (tile.task == RenderTile::PATH_TRACE) { if (use_split_kernel) { device_only_memory<uchar> void_buffer(this, "void_buffer"); @@ -954,6 +1030,33 @@ class CPUDevice : public Device { delete split_kernel; } + void thread_denoise(DeviceTask &task) + { + RenderTile tile; + tile.x = task.x; + tile.y = task.y; + tile.w = task.w; + tile.h = task.h; + tile.buffer = task.buffer; + tile.sample = task.sample + task.num_samples; + tile.num_samples = task.num_samples; + tile.start_sample = task.sample; + tile.offset = task.offset; + tile.stride = task.stride; + tile.buffers = task.buffers; + + DenoisingTask denoising(this, task); + + ProfilingState denoising_profiler_state; + profiler.add_state(&denoising_profiler_state); + denoising.profiler = &denoising_profiler_state; + + denoise(denoising, tile); + task.update_progress(&tile, tile.w * tile.h); + + profiler.remove_state(&denoising_profiler_state); + } + void thread_film_convert(DeviceTask &task) { float sample_scale = 1.0f / (task.sample + 1); diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index dfd80d678fd..9a703b45c0a 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -14,2562 +14,21 @@ * limitations under the License. */ -#include <climits> -#include <limits.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> +#ifdef WITH_CUDA -#include "device/device.h" -#include "device/device_denoising.h" -#include "device/device_intern.h" -#include "device/device_split_kernel.h" +# include "device/cuda/device_cuda.h" +# include "device/device.h" +# include "device/device_intern.h" -#include "render/buffers.h" - -#include "kernel/filter/filter_defines.h" - -#ifdef WITH_CUDA_DYNLOAD -# include "cuew.h" -#else -# include "util/util_opengl.h" -# include <cuda.h> -# include <cudaGL.h> -#endif -#include "util/util_debug.h" -#include "util/util_foreach.h" -#include "util/util_logging.h" -#include "util/util_map.h" -#include "util/util_md5.h" -#include "util/util_opengl.h" -#include "util/util_path.h" -#include "util/util_string.h" -#include "util/util_system.h" -#include "util/util_types.h" -#include "util/util_time.h" -#include "util/util_windows.h" - -#include "kernel/split/kernel_split_data_types.h" +# include "util/util_logging.h" +# include "util/util_string.h" +# include "util/util_windows.h" CCL_NAMESPACE_BEGIN -#ifndef WITH_CUDA_DYNLOAD - -/* Transparently implement some functions, so majority of the file does not need - * to worry about difference between dynamically loaded and linked CUDA at all. - */ - -namespace { - -const char *cuewErrorString(CUresult result) -{ - /* We can only give error code here without major code duplication, that - * should be enough since dynamic loading is only being disabled by folks - * who knows what they're doing anyway. - * - * NOTE: Avoid call from several threads. - */ - static string error; - error = string_printf("%d", result); - return error.c_str(); -} - -const char *cuewCompilerPath() -{ - return CYCLES_CUDA_NVCC_EXECUTABLE; -} - -int cuewCompilerVersion() -{ - return (CUDA_VERSION / 100) + (CUDA_VERSION % 100 / 10); -} - -} /* namespace */ -#endif /* WITH_CUDA_DYNLOAD */ - -class CUDADevice; - -class CUDASplitKernel : public DeviceSplitKernel { - CUDADevice *device; - - public: - explicit CUDASplitKernel(CUDADevice *device); - - virtual uint64_t state_buffer_size(device_memory &kg, device_memory &data, size_t num_threads); - - virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim, - RenderTile &rtile, - int num_global_elements, - device_memory &kernel_globals, - device_memory &kernel_data_, - device_memory &split_data, - device_memory &ray_state, - device_memory &queue_index, - device_memory &use_queues_flag, - device_memory &work_pool_wgs); - - virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name, - const DeviceRequestedFeatures &); - virtual int2 split_kernel_local_size(); - virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask *task); -}; - -/* Utility to push/pop CUDA context. */ -class CUDAContextScope { - public: - CUDAContextScope(CUDADevice *device); - ~CUDAContextScope(); - - private: - CUDADevice *device; -}; - -class CUDADevice : public Device { - public: - DedicatedTaskPool task_pool; - CUdevice cuDevice; - CUcontext cuContext; - CUmodule cuModule, cuFilterModule; - size_t device_texture_headroom; - size_t device_working_headroom; - bool move_texture_to_host; - size_t map_host_used; - size_t map_host_limit; - int can_map_host; - int cuDevId; - int cuDevArchitecture; - bool first_error; - CUDASplitKernel *split_kernel; - - struct CUDAMem { - CUDAMem() : texobject(0), array(0), use_mapped_host(false) - { - } - - CUtexObject texobject; - CUarray array; - - /* If true, a mapped host memory in shared_pointer is being used. */ - bool use_mapped_host; - }; - typedef map<device_memory *, CUDAMem> CUDAMemMap; - CUDAMemMap cuda_mem_map; - - struct PixelMem { - GLuint cuPBO; - CUgraphicsResource cuPBOresource; - GLuint cuTexId; - int w, h; - }; - map<device_ptr, PixelMem> pixel_mem_map; - - /* Bindless Textures */ - device_vector<TextureInfo> texture_info; - bool need_texture_info; - - CUdeviceptr cuda_device_ptr(device_ptr mem) - { - return (CUdeviceptr)mem; - } - - static bool have_precompiled_kernels() - { - string cubins_path = path_get("lib"); - return path_exists(cubins_path); - } - - virtual bool show_samples() const - { - /* The CUDADevice only processes one tile at a time, so showing samples is fine. */ - return true; - } - - virtual BVHLayoutMask get_bvh_layout_mask() const - { - return BVH_LAYOUT_BVH2; - } - - /*#ifdef NDEBUG -#define cuda_abort() -#else -#define cuda_abort() abort() -#endif*/ - void cuda_error_documentation() - { - if (first_error) { - fprintf(stderr, - "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n"); - fprintf(stderr, - "https://docs.blender.org/manual/en/latest/render/cycles/gpu_rendering.html\n\n"); - first_error = false; - } - } - -#define cuda_assert(stmt) \ - { \ - CUresult result = stmt; \ -\ - if (result != CUDA_SUCCESS) { \ - string message = string_printf( \ - "CUDA error: %s in %s, line %d", cuewErrorString(result), #stmt, __LINE__); \ - if (error_msg == "") \ - error_msg = message; \ - fprintf(stderr, "%s\n", message.c_str()); \ - /*cuda_abort();*/ \ - cuda_error_documentation(); \ - } \ - } \ - (void)0 - - bool cuda_error_(CUresult result, const string &stmt) - { - if (result == CUDA_SUCCESS) - return false; - - string message = string_printf("CUDA error at %s: %s", stmt.c_str(), cuewErrorString(result)); - if (error_msg == "") - error_msg = message; - fprintf(stderr, "%s\n", message.c_str()); - cuda_error_documentation(); - return true; - } - -#define cuda_error(stmt) cuda_error_(stmt, #stmt) - - void cuda_error_message(const string &message) - { - if (error_msg == "") - error_msg = message; - fprintf(stderr, "%s\n", message.c_str()); - cuda_error_documentation(); - } - - CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_) - : Device(info, stats, profiler, background_), - texture_info(this, "__texture_info", MEM_TEXTURE) - { - first_error = true; - background = background_; - - cuDevId = info.num; - cuDevice = 0; - cuContext = 0; - - cuModule = 0; - cuFilterModule = 0; - - split_kernel = NULL; - - need_texture_info = false; - - device_texture_headroom = 0; - device_working_headroom = 0; - move_texture_to_host = false; - map_host_limit = 0; - map_host_used = 0; - can_map_host = 0; - - /* Intialize CUDA. */ - if (cuda_error(cuInit(0))) - return; - - /* Setup device and context. */ - if (cuda_error(cuDeviceGet(&cuDevice, cuDevId))) - return; - - /* CU_CTX_MAP_HOST for mapping host memory when out of device memory. - * CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render, - * so we can predict which memory to map to host. */ - cuda_assert( - cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice)); - - unsigned int ctx_flags = CU_CTX_LMEM_RESIZE_TO_MAX; - if (can_map_host) { - ctx_flags |= CU_CTX_MAP_HOST; - init_host_memory(); - } - - /* Create context. */ - CUresult result; - - if (background) { - result = cuCtxCreate(&cuContext, ctx_flags, cuDevice); - } - else { - result = cuGLCtxCreate(&cuContext, ctx_flags, cuDevice); - - if (result != CUDA_SUCCESS) { - result = cuCtxCreate(&cuContext, ctx_flags, cuDevice); - background = true; - } - } - - if (cuda_error_(result, "cuCtxCreate")) - return; - - int major, minor; - cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId); - cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId); - cuDevArchitecture = major * 100 + minor * 10; - - /* Pop context set by cuCtxCreate. */ - cuCtxPopCurrent(NULL); - } - - ~CUDADevice() - { - task_pool.stop(); - - delete split_kernel; - - texture_info.free(); - - cuda_assert(cuCtxDestroy(cuContext)); - } - - bool support_device(const DeviceRequestedFeatures & /*requested_features*/) - { - int major, minor; - cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId); - cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId); - - /* We only support sm_30 and above */ - if (major < 3) { - cuda_error_message(string_printf( - "CUDA device supported only with compute capability 3.0 or up, found %d.%d.", - major, - minor)); - return false; - } - - return true; - } - - bool use_adaptive_compilation() - { - return DebugFlags().cuda.adaptive_compile; - } - - bool use_split_kernel() - { - return DebugFlags().cuda.split_kernel; - } - - /* Common NVCC flags which stays the same regardless of shading model, - * kernel sources md5 and only depends on compiler or compilation settings. - */ - string compile_kernel_get_common_cflags(const DeviceRequestedFeatures &requested_features, - bool filter = false, - bool split = false) - { - const int machine = system_cpu_bits(); - const string source_path = path_get("source"); - const string include_path = source_path; - string cflags = string_printf( - "-m%d " - "--ptxas-options=\"-v\" " - "--use_fast_math " - "-DNVCC " - "-I\"%s\"", - machine, - include_path.c_str()); - if (!filter && use_adaptive_compilation()) { - cflags += " " + requested_features.get_build_options(); - } - const char *extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS"); - if (extra_cflags) { - cflags += string(" ") + string(extra_cflags); - } -#ifdef WITH_CYCLES_DEBUG - cflags += " -D__KERNEL_DEBUG__"; -#endif - - if (split) { - cflags += " -D__SPLIT__"; - } - - return cflags; - } - - bool compile_check_compiler() - { - const char *nvcc = cuewCompilerPath(); - if (nvcc == NULL) { - cuda_error_message( - "CUDA nvcc compiler not found. " - "Install CUDA toolkit in default location."); - return false; - } - const int cuda_version = cuewCompilerVersion(); - VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << cuda_version << "."; - const int major = cuda_version / 10, minor = cuda_version % 10; - if (cuda_version == 0) { - cuda_error_message("CUDA nvcc compiler version could not be parsed."); - return false; - } - if (cuda_version < 80) { - printf( - "Unsupported CUDA version %d.%d detected, " - "you need CUDA 8.0 or newer.\n", - major, - minor); - return false; - } - else if (cuda_version != 101) { - printf( - "CUDA version %d.%d detected, build may succeed but only " - "CUDA 10.1 is officially supported.\n", - major, - minor); - } - return true; - } - - string compile_kernel(const DeviceRequestedFeatures &requested_features, - bool filter = false, - bool split = false) - { - const char *name, *source; - if (filter) { - name = "filter"; - source = "filter.cu"; - } - else if (split) { - name = "kernel_split"; - source = "kernel_split.cu"; - } - else { - name = "kernel"; - source = "kernel.cu"; - } - /* Compute cubin name. */ - int major, minor; - cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId); - cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId); - - /* Attempt to use kernel provided with Blender. */ - if (!use_adaptive_compilation()) { - const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor)); - VLOG(1) << "Testing for pre-compiled kernel " << cubin << "."; - if (path_exists(cubin)) { - VLOG(1) << "Using precompiled kernel."; - return cubin; - } - const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor)); - VLOG(1) << "Testing for pre-compiled kernel " << ptx << "."; - if (path_exists(ptx)) { - VLOG(1) << "Using precompiled kernel."; - return ptx; - } - } - - const string common_cflags = compile_kernel_get_common_cflags( - requested_features, filter, split); - - /* Try to use locally compiled kernel. */ - const string source_path = path_get("source"); - const string kernel_md5 = path_files_md5_hash(source_path); - - /* We include cflags into md5 so changing cuda toolkit or changing other - * compiler command line arguments makes sure cubin gets re-built. - */ - const string cubin_md5 = util_md5_string(kernel_md5 + common_cflags); - - const string cubin_file = string_printf( - "cycles_%s_sm%d%d_%s.cubin", name, major, minor, cubin_md5.c_str()); - const string cubin = path_cache_get(path_join("kernels", cubin_file)); - VLOG(1) << "Testing for locally compiled kernel " << cubin << "."; - if (path_exists(cubin)) { - VLOG(1) << "Using locally compiled kernel."; - return cubin; - } - -#ifdef _WIN32 - if (have_precompiled_kernels()) { - if (major < 3) { - cuda_error_message( - string_printf("CUDA device requires compute capability 3.0 or up, " - "found %d.%d. Your GPU is not supported.", - major, - minor)); - } - else { - cuda_error_message( - string_printf("CUDA binary kernel for this graphics card compute " - "capability (%d.%d) not found.", - major, - minor)); - } - return ""; - } -#endif - - /* Compile. */ - if (!compile_check_compiler()) { - return ""; - } - const char *nvcc = cuewCompilerPath(); - const string kernel = path_join(path_join(source_path, "kernel"), - path_join("kernels", path_join("cuda", source))); - double starttime = time_dt(); - printf("Compiling CUDA kernel ...\n"); - - path_create_directories(cubin); - - string command = string_printf( - "\"%s\" " - "-arch=sm_%d%d " - "--cubin \"%s\" " - "-o \"%s\" " - "%s ", - nvcc, - major, - minor, - kernel.c_str(), - cubin.c_str(), - common_cflags.c_str()); - - printf("%s\n", command.c_str()); - - if (system(command.c_str()) == -1) { - cuda_error_message( - "Failed to execute compilation command, " - "see console for details."); - return ""; - } - - /* Verify if compilation succeeded */ - if (!path_exists(cubin)) { - cuda_error_message( - "CUDA kernel compilation failed, " - "see console for details."); - return ""; - } - - printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime); - - return cubin; - } - - bool load_kernels(const DeviceRequestedFeatures &requested_features) - { - /* TODO(sergey): Support kernels re-load for CUDA devices. - * - * Currently re-loading kernel will invalidate memory pointers, - * causing problems in cuCtxSynchronize. - */ - if (cuFilterModule && cuModule) { - VLOG(1) << "Skipping kernel reload, not currently supported."; - return true; - } - - /* check if cuda init succeeded */ - if (cuContext == 0) - return false; - - /* check if GPU is supported */ - if (!support_device(requested_features)) - return false; - - /* get kernel */ - string cubin = compile_kernel(requested_features, false, use_split_kernel()); - if (cubin == "") - return false; - - string filter_cubin = compile_kernel(requested_features, true, false); - if (filter_cubin == "") - return false; - - /* open module */ - CUDAContextScope scope(this); - - string cubin_data; - CUresult result; - - if (path_read_text(cubin, cubin_data)) - result = cuModuleLoadData(&cuModule, cubin_data.c_str()); - else - result = CUDA_ERROR_FILE_NOT_FOUND; - - if (cuda_error_(result, "cuModuleLoad")) - cuda_error_message(string_printf("Failed loading CUDA kernel %s.", cubin.c_str())); - - if (path_read_text(filter_cubin, cubin_data)) - result = cuModuleLoadData(&cuFilterModule, cubin_data.c_str()); - else - result = CUDA_ERROR_FILE_NOT_FOUND; - - if (cuda_error_(result, "cuModuleLoad")) - cuda_error_message(string_printf("Failed loading CUDA kernel %s.", filter_cubin.c_str())); - - if (result == CUDA_SUCCESS) { - reserve_local_memory(requested_features); - } - - return (result == CUDA_SUCCESS); - } - - void reserve_local_memory(const DeviceRequestedFeatures &requested_features) - { - if (use_split_kernel()) { - /* Split kernel mostly uses global memory and adaptive compilation, - * difficult to predict how much is needed currently. */ - return; - } - - /* Together with CU_CTX_LMEM_RESIZE_TO_MAX, this reserves local memory - * needed for kernel launches, so that we can reliably figure out when - * to allocate scene data in mapped host memory. */ - CUDAContextScope scope(this); - - size_t total = 0, free_before = 0, free_after = 0; - cuMemGetInfo(&free_before, &total); - - /* Get kernel function. */ - CUfunction cuPathTrace; - - if (requested_features.use_integrator_branched) { - cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace")); - } - else { - cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace")); - } - - cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)); - - int min_blocks, num_threads_per_block; - cuda_assert(cuOccupancyMaxPotentialBlockSize( - &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0)); - - /* Launch kernel, using just 1 block appears sufficient to reserve - * memory for all multiprocessors. It would be good to do this in - * parallel for the multi GPU case still to make it faster. */ - CUdeviceptr d_work_tiles = 0; - uint total_work_size = 0; - - void *args[] = {&d_work_tiles, &total_work_size}; - - cuda_assert(cuLaunchKernel(cuPathTrace, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0)); - - cuda_assert(cuCtxSynchronize()); - - cuMemGetInfo(&free_after, &total); - VLOG(1) << "Local memory reserved " << string_human_readable_number(free_before - free_after) - << " bytes. (" << string_human_readable_size(free_before - free_after) << ")"; - -#if 0 - /* For testing mapped host memory, fill up device memory. */ - const size_t keep_mb = 1024; - - while (free_after > keep_mb * 1024 * 1024LL) { - CUdeviceptr tmp; - cuda_assert(cuMemAlloc(&tmp, 10 * 1024 * 1024LL)); - cuMemGetInfo(&free_after, &total); - } -#endif - } - - void init_host_memory() - { - /* Limit amount of host mapped memory, because allocating too much can - * cause system instability. Leave at least half or 4 GB of system - * memory free, whichever is smaller. */ - size_t default_limit = 4 * 1024 * 1024 * 1024LL; - size_t system_ram = system_physical_ram(); - - if (system_ram > 0) { - if (system_ram / 2 > default_limit) { - map_host_limit = system_ram - default_limit; - } - else { - map_host_limit = system_ram / 2; - } - } - else { - VLOG(1) << "Mapped host memory disabled, failed to get system RAM"; - map_host_limit = 0; - } - - /* Amount of device memory to keep is free after texture memory - * and working memory allocations respectively. We set the working - * memory limit headroom lower so that some space is left after all - * texture memory allocations. */ - device_working_headroom = 32 * 1024 * 1024LL; // 32MB - device_texture_headroom = 128 * 1024 * 1024LL; // 128MB - - VLOG(1) << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit) - << " bytes. (" << string_human_readable_size(map_host_limit) << ")"; - } - - void load_texture_info() - { - if (need_texture_info) { - texture_info.copy_to_device(); - need_texture_info = false; - } - } - - void move_textures_to_host(size_t size, bool for_texture) - { - /* Signal to reallocate textures in host memory only. */ - move_texture_to_host = true; - - while (size > 0) { - /* Find suitable memory allocation to move. */ - device_memory *max_mem = NULL; - size_t max_size = 0; - bool max_is_image = false; - - foreach (CUDAMemMap::value_type &pair, cuda_mem_map) { - device_memory &mem = *pair.first; - CUDAMem *cmem = &pair.second; - - bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info); - bool is_image = is_texture && (mem.data_height > 1); - - /* Can't move this type of memory. */ - if (!is_texture || cmem->array) { - continue; - } - - /* Already in host memory. */ - if (cmem->use_mapped_host) { - continue; - } - - /* For other textures, only move image textures. */ - if (for_texture && !is_image) { - continue; - } - - /* Try to move largest allocation, prefer moving images. */ - if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) { - max_is_image = is_image; - max_size = mem.device_size; - max_mem = &mem; - } - } - - /* Move to host memory. This part is mutex protected since - * multiple CUDA devices could be moving the memory. The - * first one will do it, and the rest will adopt the pointer. */ - if (max_mem) { - VLOG(1) << "Move memory from device to host: " << max_mem->name; - - static thread_mutex move_mutex; - thread_scoped_lock lock(move_mutex); - - /* Preserve the original device pointer, in case of multi device - * we can't change it because the pointer mapping would break. */ - device_ptr prev_pointer = max_mem->device_pointer; - size_t prev_size = max_mem->device_size; - - tex_free(*max_mem); - tex_alloc(*max_mem); - size = (max_size >= size) ? 0 : size - max_size; - - max_mem->device_pointer = prev_pointer; - max_mem->device_size = prev_size; - } - else { - break; - } - } - - /* Update texture info array with new pointers. */ - load_texture_info(); - - move_texture_to_host = false; - } - - CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0) - { - CUDAContextScope scope(this); - - CUdeviceptr device_pointer = 0; - size_t size = mem.memory_size() + pitch_padding; - - CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY; - const char *status = ""; - - /* First try allocating in device memory, respecting headroom. We make - * an exception for texture info. It is small and frequently accessed, - * so treat it as working memory. - * - * If there is not enough room for working memory, we will try to move - * textures to host memory, assuming the performance impact would have - * been worse for working memory. */ - bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info); - bool is_image = is_texture && (mem.data_height > 1); - - size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom; - - size_t total = 0, free = 0; - cuMemGetInfo(&free, &total); - - /* Move textures to host memory if needed. */ - if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) { - move_textures_to_host(size + headroom - free, is_texture); - cuMemGetInfo(&free, &total); - } - - /* Allocate in device memory. */ - if (!move_texture_to_host && (size + headroom) < free) { - mem_alloc_result = cuMemAlloc(&device_pointer, size); - if (mem_alloc_result == CUDA_SUCCESS) { - status = " in device memory"; - } - } - - /* Fall back to mapped host memory if needed and possible. */ - - void *shared_pointer = 0; - - if (mem_alloc_result != CUDA_SUCCESS && can_map_host) { - if (mem.shared_pointer) { - /* Another device already allocated host memory. */ - mem_alloc_result = CUDA_SUCCESS; - shared_pointer = mem.shared_pointer; - } - else if (map_host_used + size < map_host_limit) { - /* Allocate host memory ourselves. */ - mem_alloc_result = cuMemHostAlloc( - &shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED); - - assert((mem_alloc_result == CUDA_SUCCESS && shared_pointer != 0) || - (mem_alloc_result != CUDA_SUCCESS && shared_pointer == 0)); - } - - if (mem_alloc_result == CUDA_SUCCESS) { - cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, shared_pointer, 0)); - map_host_used += size; - status = " in host memory"; - } - else { - status = " failed, out of host memory"; - } - } - - if (mem_alloc_result != CUDA_SUCCESS) { - status = " failed, out of device and host memory"; - cuda_assert(mem_alloc_result); - } - - if (mem.name) { - VLOG(1) << "Buffer allocate: " << mem.name << ", " - << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")" << status; - } - - mem.device_pointer = (device_ptr)device_pointer; - mem.device_size = size; - stats.mem_alloc(size); - - if (!mem.device_pointer) { - return NULL; - } - - /* Insert into map of allocations. */ - CUDAMem *cmem = &cuda_mem_map[&mem]; - if (shared_pointer != 0) { - /* Replace host pointer with our host allocation. Only works if - * CUDA memory layout is the same and has no pitch padding. Also - * does not work if we move textures to host during a render, - * since other devices might be using the memory. */ - - if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer && - mem.host_pointer != shared_pointer) { - memcpy(shared_pointer, mem.host_pointer, size); - - /* A Call to device_memory::host_free() should be preceded by - * a call to device_memory::device_free() for host memory - * allocated by a device to be handled properly. Two exceptions - * are here and a call in OptiXDevice::generic_alloc(), where - * the current host memory can be assumed to be allocated by - * device_memory::host_alloc(), not by a device */ - - mem.host_free(); - mem.host_pointer = shared_pointer; - } - mem.shared_pointer = shared_pointer; - mem.shared_counter++; - cmem->use_mapped_host = true; - } - else { - cmem->use_mapped_host = false; - } - - return cmem; - } - - void generic_copy_to(device_memory &mem) - { - if (mem.host_pointer && mem.device_pointer) { - CUDAContextScope scope(this); - - /* If use_mapped_host of mem is false, the current device only - * uses device memory allocated by cuMemAlloc regardless of - * mem.host_pointer and mem.shared_pointer, and should copy - * data from mem.host_pointer. */ - - if (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer) { - cuda_assert(cuMemcpyHtoD( - cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size())); - } - } - } - - void generic_free(device_memory &mem) - { - if (mem.device_pointer) { - CUDAContextScope scope(this); - const CUDAMem &cmem = cuda_mem_map[&mem]; - - /* If cmem.use_mapped_host is true, reference counting is used - * to safely free a mapped host memory. */ - - if (cmem.use_mapped_host) { - assert(mem.shared_pointer); - if (mem.shared_pointer) { - assert(mem.shared_counter > 0); - if (--mem.shared_counter == 0) { - if (mem.host_pointer == mem.shared_pointer) { - mem.host_pointer = 0; - } - cuMemFreeHost(mem.shared_pointer); - mem.shared_pointer = 0; - } - } - map_host_used -= mem.device_size; - } - else { - /* Free device memory. */ - cuMemFree(mem.device_pointer); - } - - stats.mem_free(mem.device_size); - mem.device_pointer = 0; - mem.device_size = 0; - - cuda_mem_map.erase(cuda_mem_map.find(&mem)); - } - } - - void mem_alloc(device_memory &mem) - { - if (mem.type == MEM_PIXELS && !background) { - pixels_alloc(mem); - } - else if (mem.type == MEM_TEXTURE) { - assert(!"mem_alloc not supported for textures."); - } - else { - generic_alloc(mem); - } - } - - void mem_copy_to(device_memory &mem) - { - if (mem.type == MEM_PIXELS) { - assert(!"mem_copy_to not supported for pixels."); - } - else if (mem.type == MEM_TEXTURE) { - tex_free(mem); - tex_alloc(mem); - } - else { - if (!mem.device_pointer) { - generic_alloc(mem); - } - - generic_copy_to(mem); - } - } - - void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) - { - if (mem.type == MEM_PIXELS && !background) { - pixels_copy_from(mem, y, w, h); - } - else if (mem.type == MEM_TEXTURE) { - assert(!"mem_copy_from not supported for textures."); - } - else { - CUDAContextScope scope(this); - size_t offset = elem * y * w; - size_t size = elem * w * h; - - if (mem.host_pointer && mem.device_pointer) { - cuda_assert(cuMemcpyDtoH( - (uchar *)mem.host_pointer + offset, (CUdeviceptr)(mem.device_pointer + offset), size)); - } - else if (mem.host_pointer) { - memset((char *)mem.host_pointer + offset, 0, size); - } - } - } - - void mem_zero(device_memory &mem) - { - if (!mem.device_pointer) { - mem_alloc(mem); - } - - if (mem.host_pointer) { - memset(mem.host_pointer, 0, mem.memory_size()); - } - - /* If use_mapped_host of mem is false, mem.device_pointer currently - * refers to device memory regardless of mem.host_pointer and - * mem.shared_pointer. */ - - if (mem.device_pointer && - (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer)) { - CUDAContextScope scope(this); - cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size())); - } - } - - void mem_free(device_memory &mem) - { - if (mem.type == MEM_PIXELS && !background) { - pixels_free(mem); - } - else if (mem.type == MEM_TEXTURE) { - tex_free(mem); - } - else { - generic_free(mem); - } - } - - virtual device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) - { - return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset)); - } - - void const_copy_to(const char *name, void *host, size_t size) - { - CUDAContextScope scope(this); - CUdeviceptr mem; - size_t bytes; - - cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name)); - // assert(bytes == size); - cuda_assert(cuMemcpyHtoD(mem, host, size)); - } - - void tex_alloc(device_memory &mem) - { - CUDAContextScope scope(this); - - /* General variables for both architectures */ - string bind_name = mem.name; - size_t dsize = datatype_size(mem.data_type); - size_t size = mem.memory_size(); - - CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP; - switch (mem.extension) { - case EXTENSION_REPEAT: - address_mode = CU_TR_ADDRESS_MODE_WRAP; - break; - case EXTENSION_EXTEND: - address_mode = CU_TR_ADDRESS_MODE_CLAMP; - break; - case EXTENSION_CLIP: - address_mode = CU_TR_ADDRESS_MODE_BORDER; - break; - default: - assert(0); - break; - } - - CUfilter_mode filter_mode; - if (mem.interpolation == INTERPOLATION_CLOSEST) { - filter_mode = CU_TR_FILTER_MODE_POINT; - } - else { - filter_mode = CU_TR_FILTER_MODE_LINEAR; - } - - /* Data Storage */ - if (mem.interpolation == INTERPOLATION_NONE) { - generic_alloc(mem); - generic_copy_to(mem); - - CUdeviceptr cumem; - size_t cubytes; - - cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str())); - - if (cubytes == 8) { - /* 64 bit device pointer */ - uint64_t ptr = mem.device_pointer; - cuda_assert(cuMemcpyHtoD(cumem, (void *)&ptr, cubytes)); - } - else { - /* 32 bit device pointer */ - uint32_t ptr = (uint32_t)mem.device_pointer; - cuda_assert(cuMemcpyHtoD(cumem, (void *)&ptr, cubytes)); - } - return; - } - - /* Image Texture Storage */ - CUarray_format_enum format; - switch (mem.data_type) { - case TYPE_UCHAR: - format = CU_AD_FORMAT_UNSIGNED_INT8; - break; - case TYPE_UINT16: - format = CU_AD_FORMAT_UNSIGNED_INT16; - break; - case TYPE_UINT: - format = CU_AD_FORMAT_UNSIGNED_INT32; - break; - case TYPE_INT: - format = CU_AD_FORMAT_SIGNED_INT32; - break; - case TYPE_FLOAT: - format = CU_AD_FORMAT_FLOAT; - break; - case TYPE_HALF: - format = CU_AD_FORMAT_HALF; - break; - default: - assert(0); - return; - } - - CUDAMem *cmem = NULL; - CUarray array_3d = NULL; - size_t src_pitch = mem.data_width * dsize * mem.data_elements; - size_t dst_pitch = src_pitch; - - if (mem.data_depth > 1) { - /* 3D texture using array, there is no API for linear memory. */ - CUDA_ARRAY3D_DESCRIPTOR desc; - - desc.Width = mem.data_width; - desc.Height = mem.data_height; - desc.Depth = mem.data_depth; - desc.Format = format; - desc.NumChannels = mem.data_elements; - desc.Flags = 0; - - VLOG(1) << "Array 3D allocate: " << mem.name << ", " - << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")"; - - cuda_assert(cuArray3DCreate(&array_3d, &desc)); - - if (!array_3d) { - return; - } - - CUDA_MEMCPY3D param; - memset(¶m, 0, sizeof(param)); - param.dstMemoryType = CU_MEMORYTYPE_ARRAY; - param.dstArray = array_3d; - param.srcMemoryType = CU_MEMORYTYPE_HOST; - param.srcHost = mem.host_pointer; - param.srcPitch = src_pitch; - param.WidthInBytes = param.srcPitch; - param.Height = mem.data_height; - param.Depth = mem.data_depth; - - cuda_assert(cuMemcpy3D(¶m)); - - mem.device_pointer = (device_ptr)array_3d; - mem.device_size = size; - stats.mem_alloc(size); - - cmem = &cuda_mem_map[&mem]; - cmem->texobject = 0; - cmem->array = array_3d; - } - else if (mem.data_height > 0) { - /* 2D texture, using pitch aligned linear memory. */ - int alignment = 0; - cuda_assert( - cuDeviceGetAttribute(&alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice)); - dst_pitch = align_up(src_pitch, alignment); - size_t dst_size = dst_pitch * mem.data_height; - - cmem = generic_alloc(mem, dst_size - mem.memory_size()); - if (!cmem) { - return; - } - - CUDA_MEMCPY2D param; - memset(¶m, 0, sizeof(param)); - param.dstMemoryType = CU_MEMORYTYPE_DEVICE; - param.dstDevice = mem.device_pointer; - param.dstPitch = dst_pitch; - param.srcMemoryType = CU_MEMORYTYPE_HOST; - param.srcHost = mem.host_pointer; - param.srcPitch = src_pitch; - param.WidthInBytes = param.srcPitch; - param.Height = mem.data_height; - - cuda_assert(cuMemcpy2DUnaligned(¶m)); - } - else { - /* 1D texture, using linear memory. */ - cmem = generic_alloc(mem); - if (!cmem) { - return; - } - - cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size)); - } - - /* Kepler+, bindless textures. */ - int flat_slot = 0; - if (string_startswith(mem.name, "__tex_image")) { - int pos = string(mem.name).rfind("_"); - flat_slot = atoi(mem.name + pos + 1); - } - else { - assert(0); - } - - CUDA_RESOURCE_DESC resDesc; - memset(&resDesc, 0, sizeof(resDesc)); - - if (array_3d) { - resDesc.resType = CU_RESOURCE_TYPE_ARRAY; - resDesc.res.array.hArray = array_3d; - resDesc.flags = 0; - } - else if (mem.data_height > 0) { - resDesc.resType = CU_RESOURCE_TYPE_PITCH2D; - resDesc.res.pitch2D.devPtr = mem.device_pointer; - resDesc.res.pitch2D.format = format; - resDesc.res.pitch2D.numChannels = mem.data_elements; - resDesc.res.pitch2D.height = mem.data_height; - resDesc.res.pitch2D.width = mem.data_width; - resDesc.res.pitch2D.pitchInBytes = dst_pitch; - } - else { - resDesc.resType = CU_RESOURCE_TYPE_LINEAR; - resDesc.res.linear.devPtr = mem.device_pointer; - resDesc.res.linear.format = format; - resDesc.res.linear.numChannels = mem.data_elements; - resDesc.res.linear.sizeInBytes = mem.device_size; - } - - CUDA_TEXTURE_DESC texDesc; - memset(&texDesc, 0, sizeof(texDesc)); - texDesc.addressMode[0] = address_mode; - texDesc.addressMode[1] = address_mode; - texDesc.addressMode[2] = address_mode; - texDesc.filterMode = filter_mode; - texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES; - - cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL)); - - /* Resize once */ - if (flat_slot >= texture_info.size()) { - /* Allocate some slots in advance, to reduce amount - * of re-allocations. */ - texture_info.resize(flat_slot + 128); - } - - /* Set Mapping and tag that we need to (re-)upload to device */ - TextureInfo &info = texture_info[flat_slot]; - info.data = (uint64_t)cmem->texobject; - info.cl_buffer = 0; - info.interpolation = mem.interpolation; - info.extension = mem.extension; - info.width = mem.data_width; - info.height = mem.data_height; - info.depth = mem.data_depth; - need_texture_info = true; - } - - void tex_free(device_memory &mem) - { - if (mem.device_pointer) { - CUDAContextScope scope(this); - const CUDAMem &cmem = cuda_mem_map[&mem]; - - if (cmem.texobject) { - /* Free bindless texture. */ - cuTexObjectDestroy(cmem.texobject); - } - - if (cmem.array) { - /* Free array. */ - cuArrayDestroy(cmem.array); - stats.mem_free(mem.device_size); - mem.device_pointer = 0; - mem.device_size = 0; - - cuda_mem_map.erase(cuda_mem_map.find(&mem)); - } - else { - generic_free(mem); - } - } - } - -#define CUDA_GET_BLOCKSIZE(func, w, h) \ - int threads_per_block; \ - cuda_assert( \ - cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \ - int threads = (int)sqrt((float)threads_per_block); \ - int xblocks = ((w) + threads - 1) / threads; \ - int yblocks = ((h) + threads - 1) / threads; - -#define CUDA_LAUNCH_KERNEL(func, args) \ - cuda_assert(cuLaunchKernel(func, xblocks, yblocks, 1, threads, threads, 1, 0, 0, args, 0)); - -/* Similar as above, but for 1-dimensional blocks. */ -#define CUDA_GET_BLOCKSIZE_1D(func, w, h) \ - int threads_per_block; \ - cuda_assert( \ - cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \ - int xblocks = ((w) + threads_per_block - 1) / threads_per_block; \ - int yblocks = h; - -#define CUDA_LAUNCH_KERNEL_1D(func, args) \ - cuda_assert(cuLaunchKernel(func, xblocks, yblocks, 1, threads_per_block, 1, 1, 0, 0, args, 0)); - - bool denoising_non_local_means(device_ptr image_ptr, - device_ptr guide_ptr, - device_ptr variance_ptr, - device_ptr out_ptr, - DenoisingTask *task) - { - if (have_error()) - return false; - - CUDAContextScope scope(this); - - int stride = task->buffer.stride; - int w = task->buffer.width; - int h = task->buffer.h; - int r = task->nlm_state.r; - int f = task->nlm_state.f; - float a = task->nlm_state.a; - float k_2 = task->nlm_state.k_2; - - int pass_stride = task->buffer.pass_stride; - int num_shifts = (2 * r + 1) * (2 * r + 1); - int channel_offset = task->nlm_state.is_color ? task->buffer.pass_stride : 0; - int frame_offset = 0; - - if (have_error()) - return false; - - CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer); - CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts; - CUdeviceptr weightAccum = difference + 2 * sizeof(float) * pass_stride * num_shifts; - CUdeviceptr scale_ptr = 0; - - cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float) * pass_stride)); - cuda_assert(cuMemsetD8(out_ptr, 0, sizeof(float) * pass_stride)); - - { - CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMUpdateOutput; - cuda_assert(cuModuleGetFunction( - &cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference")); - cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur")); - cuda_assert(cuModuleGetFunction( - &cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight")); - cuda_assert(cuModuleGetFunction( - &cuNLMUpdateOutput, cuFilterModule, "kernel_cuda_filter_nlm_update_output")); - - cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1)); - cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1)); - cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1)); - cuda_assert(cuFuncSetCacheConfig(cuNLMUpdateOutput, CU_FUNC_CACHE_PREFER_L1)); - - CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, w * h, num_shifts); - - void *calc_difference_args[] = {&guide_ptr, - &variance_ptr, - &scale_ptr, - &difference, - &w, - &h, - &stride, - &pass_stride, - &r, - &channel_offset, - &frame_offset, - &a, - &k_2}; - void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f}; - void *calc_weight_args[] = { - &blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f}; - void *update_output_args[] = {&blurDifference, - &image_ptr, - &out_ptr, - &weightAccum, - &w, - &h, - &stride, - &pass_stride, - &channel_offset, - &r, - &f}; - - CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args); - CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args); - CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args); - CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args); - CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args); - } - - { - CUfunction cuNLMNormalize; - cuda_assert(cuModuleGetFunction( - &cuNLMNormalize, cuFilterModule, "kernel_cuda_filter_nlm_normalize")); - cuda_assert(cuFuncSetCacheConfig(cuNLMNormalize, CU_FUNC_CACHE_PREFER_L1)); - void *normalize_args[] = {&out_ptr, &weightAccum, &w, &h, &stride}; - CUDA_GET_BLOCKSIZE(cuNLMNormalize, w, h); - CUDA_LAUNCH_KERNEL(cuNLMNormalize, normalize_args); - cuda_assert(cuCtxSynchronize()); - } - - return !have_error(); - } - - bool denoising_construct_transform(DenoisingTask *task) - { - if (have_error()) - return false; - - CUDAContextScope scope(this); - - CUfunction cuFilterConstructTransform; - cuda_assert(cuModuleGetFunction( - &cuFilterConstructTransform, cuFilterModule, "kernel_cuda_filter_construct_transform")); - cuda_assert(cuFuncSetCacheConfig(cuFilterConstructTransform, CU_FUNC_CACHE_PREFER_SHARED)); - CUDA_GET_BLOCKSIZE(cuFilterConstructTransform, task->storage.w, task->storage.h); - - void *args[] = {&task->buffer.mem.device_pointer, - &task->tile_info_mem.device_pointer, - &task->storage.transform.device_pointer, - &task->storage.rank.device_pointer, - &task->filter_area, - &task->rect, - &task->radius, - &task->pca_threshold, - &task->buffer.pass_stride, - &task->buffer.frame_stride, - &task->buffer.use_time}; - CUDA_LAUNCH_KERNEL(cuFilterConstructTransform, args); - cuda_assert(cuCtxSynchronize()); - - return !have_error(); - } - - bool denoising_accumulate(device_ptr color_ptr, - device_ptr color_variance_ptr, - device_ptr scale_ptr, - int frame, - DenoisingTask *task) - { - if (have_error()) - return false; - - CUDAContextScope scope(this); - - int r = task->radius; - int f = 4; - float a = 1.0f; - float k_2 = task->nlm_k_2; - - int w = task->reconstruction_state.source_w; - int h = task->reconstruction_state.source_h; - int stride = task->buffer.stride; - int frame_offset = frame * task->buffer.frame_stride; - int t = task->tile_info->frames[frame]; - - int pass_stride = task->buffer.pass_stride; - int num_shifts = (2 * r + 1) * (2 * r + 1); - - if (have_error()) - return false; - - CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer); - CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts; - - CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian; - cuda_assert(cuModuleGetFunction( - &cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference")); - cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur")); - cuda_assert(cuModuleGetFunction( - &cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight")); - cuda_assert(cuModuleGetFunction( - &cuNLMConstructGramian, cuFilterModule, "kernel_cuda_filter_nlm_construct_gramian")); - - cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1)); - cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1)); - cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1)); - cuda_assert(cuFuncSetCacheConfig(cuNLMConstructGramian, CU_FUNC_CACHE_PREFER_SHARED)); - - CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, - task->reconstruction_state.source_w * - task->reconstruction_state.source_h, - num_shifts); - - void *calc_difference_args[] = {&color_ptr, - &color_variance_ptr, - &scale_ptr, - &difference, - &w, - &h, - &stride, - &pass_stride, - &r, - &pass_stride, - &frame_offset, - &a, - &k_2}; - void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f}; - void *calc_weight_args[] = { - &blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f}; - void *construct_gramian_args[] = {&t, - &blurDifference, - &task->buffer.mem.device_pointer, - &task->storage.transform.device_pointer, - &task->storage.rank.device_pointer, - &task->storage.XtWX.device_pointer, - &task->storage.XtWY.device_pointer, - &task->reconstruction_state.filter_window, - &w, - &h, - &stride, - &pass_stride, - &r, - &f, - &frame_offset, - &task->buffer.use_time}; - - CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args); - CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args); - CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args); - CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args); - CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args); - cuda_assert(cuCtxSynchronize()); - - return !have_error(); - } - - bool denoising_solve(device_ptr output_ptr, DenoisingTask *task) - { - CUfunction cuFinalize; - cuda_assert(cuModuleGetFunction(&cuFinalize, cuFilterModule, "kernel_cuda_filter_finalize")); - cuda_assert(cuFuncSetCacheConfig(cuFinalize, CU_FUNC_CACHE_PREFER_L1)); - void *finalize_args[] = {&output_ptr, - &task->storage.rank.device_pointer, - &task->storage.XtWX.device_pointer, - &task->storage.XtWY.device_pointer, - &task->filter_area, - &task->reconstruction_state.buffer_params.x, - &task->render_buffer.samples}; - CUDA_GET_BLOCKSIZE( - cuFinalize, task->reconstruction_state.source_w, task->reconstruction_state.source_h); - CUDA_LAUNCH_KERNEL(cuFinalize, finalize_args); - cuda_assert(cuCtxSynchronize()); - - return !have_error(); - } - - bool denoising_combine_halves(device_ptr a_ptr, - device_ptr b_ptr, - device_ptr mean_ptr, - device_ptr variance_ptr, - int r, - int4 rect, - DenoisingTask *task) - { - if (have_error()) - return false; - - CUDAContextScope scope(this); - - CUfunction cuFilterCombineHalves; - cuda_assert(cuModuleGetFunction( - &cuFilterCombineHalves, cuFilterModule, "kernel_cuda_filter_combine_halves")); - cuda_assert(cuFuncSetCacheConfig(cuFilterCombineHalves, CU_FUNC_CACHE_PREFER_L1)); - CUDA_GET_BLOCKSIZE( - cuFilterCombineHalves, task->rect.z - task->rect.x, task->rect.w - task->rect.y); - - void *args[] = {&mean_ptr, &variance_ptr, &a_ptr, &b_ptr, &rect, &r}; - CUDA_LAUNCH_KERNEL(cuFilterCombineHalves, args); - cuda_assert(cuCtxSynchronize()); - - return !have_error(); - } - - bool denoising_divide_shadow(device_ptr a_ptr, - device_ptr b_ptr, - device_ptr sample_variance_ptr, - device_ptr sv_variance_ptr, - device_ptr buffer_variance_ptr, - DenoisingTask *task) - { - if (have_error()) - return false; - - CUDAContextScope scope(this); - - CUfunction cuFilterDivideShadow; - cuda_assert(cuModuleGetFunction( - &cuFilterDivideShadow, cuFilterModule, "kernel_cuda_filter_divide_shadow")); - cuda_assert(cuFuncSetCacheConfig(cuFilterDivideShadow, CU_FUNC_CACHE_PREFER_L1)); - CUDA_GET_BLOCKSIZE( - cuFilterDivideShadow, task->rect.z - task->rect.x, task->rect.w - task->rect.y); - - void *args[] = {&task->render_buffer.samples, - &task->tile_info_mem.device_pointer, - &a_ptr, - &b_ptr, - &sample_variance_ptr, - &sv_variance_ptr, - &buffer_variance_ptr, - &task->rect, - &task->render_buffer.pass_stride, - &task->render_buffer.offset}; - CUDA_LAUNCH_KERNEL(cuFilterDivideShadow, args); - cuda_assert(cuCtxSynchronize()); - - return !have_error(); - } - - bool denoising_get_feature(int mean_offset, - int variance_offset, - device_ptr mean_ptr, - device_ptr variance_ptr, - float scale, - DenoisingTask *task) - { - if (have_error()) - return false; - - CUDAContextScope scope(this); - - CUfunction cuFilterGetFeature; - cuda_assert(cuModuleGetFunction( - &cuFilterGetFeature, cuFilterModule, "kernel_cuda_filter_get_feature")); - cuda_assert(cuFuncSetCacheConfig(cuFilterGetFeature, CU_FUNC_CACHE_PREFER_L1)); - CUDA_GET_BLOCKSIZE( - cuFilterGetFeature, task->rect.z - task->rect.x, task->rect.w - task->rect.y); - - void *args[] = {&task->render_buffer.samples, - &task->tile_info_mem.device_pointer, - &mean_offset, - &variance_offset, - &mean_ptr, - &variance_ptr, - &scale, - &task->rect, - &task->render_buffer.pass_stride, - &task->render_buffer.offset}; - CUDA_LAUNCH_KERNEL(cuFilterGetFeature, args); - cuda_assert(cuCtxSynchronize()); - - return !have_error(); - } - - bool denoising_write_feature(int out_offset, - device_ptr from_ptr, - device_ptr buffer_ptr, - DenoisingTask *task) - { - if (have_error()) - return false; - - CUDAContextScope scope(this); - - CUfunction cuFilterWriteFeature; - cuda_assert(cuModuleGetFunction( - &cuFilterWriteFeature, cuFilterModule, "kernel_cuda_filter_write_feature")); - cuda_assert(cuFuncSetCacheConfig(cuFilterWriteFeature, CU_FUNC_CACHE_PREFER_L1)); - CUDA_GET_BLOCKSIZE(cuFilterWriteFeature, task->filter_area.z, task->filter_area.w); - - void *args[] = {&task->render_buffer.samples, - &task->reconstruction_state.buffer_params, - &task->filter_area, - &from_ptr, - &buffer_ptr, - &out_offset, - &task->rect}; - CUDA_LAUNCH_KERNEL(cuFilterWriteFeature, args); - cuda_assert(cuCtxSynchronize()); - - return !have_error(); - } - - bool denoising_detect_outliers(device_ptr image_ptr, - device_ptr variance_ptr, - device_ptr depth_ptr, - device_ptr output_ptr, - DenoisingTask *task) - { - if (have_error()) - return false; - - CUDAContextScope scope(this); - - CUfunction cuFilterDetectOutliers; - cuda_assert(cuModuleGetFunction( - &cuFilterDetectOutliers, cuFilterModule, "kernel_cuda_filter_detect_outliers")); - cuda_assert(cuFuncSetCacheConfig(cuFilterDetectOutliers, CU_FUNC_CACHE_PREFER_L1)); - CUDA_GET_BLOCKSIZE( - cuFilterDetectOutliers, task->rect.z - task->rect.x, task->rect.w - task->rect.y); - - void *args[] = {&image_ptr, - &variance_ptr, - &depth_ptr, - &output_ptr, - &task->rect, - &task->buffer.pass_stride}; - - CUDA_LAUNCH_KERNEL(cuFilterDetectOutliers, args); - cuda_assert(cuCtxSynchronize()); - - return !have_error(); - } - - void denoise(RenderTile &rtile, DenoisingTask &denoising) - { - denoising.functions.construct_transform = function_bind( - &CUDADevice::denoising_construct_transform, this, &denoising); - denoising.functions.accumulate = function_bind( - &CUDADevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising); - denoising.functions.solve = function_bind(&CUDADevice::denoising_solve, this, _1, &denoising); - denoising.functions.divide_shadow = function_bind( - &CUDADevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising); - denoising.functions.non_local_means = function_bind( - &CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising); - denoising.functions.combine_halves = function_bind( - &CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); - denoising.functions.get_feature = function_bind( - &CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising); - denoising.functions.write_feature = function_bind( - &CUDADevice::denoising_write_feature, this, _1, _2, _3, &denoising); - denoising.functions.detect_outliers = function_bind( - &CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising); - - denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h); - denoising.render_buffer.samples = rtile.sample; - denoising.buffer.gpu_temporary_mem = true; - - denoising.run_denoising(&rtile); - } - - void path_trace(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles) - { - scoped_timer timer(&rtile.buffers->render_time); - - if (have_error()) - return; - - CUDAContextScope scope(this); - CUfunction cuPathTrace; - - /* Get kernel function. */ - if (task.integrator_branched) { - cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace")); - } - else { - cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace")); - } - - if (have_error()) { - return; - } - - cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)); - - /* Allocate work tile. */ - work_tiles.alloc(1); - - WorkTile *wtile = work_tiles.data(); - wtile->x = rtile.x; - wtile->y = rtile.y; - wtile->w = rtile.w; - wtile->h = rtile.h; - wtile->offset = rtile.offset; - wtile->stride = rtile.stride; - wtile->buffer = (float *)cuda_device_ptr(rtile.buffer); - - /* Prepare work size. More step samples render faster, but for now we - * remain conservative for GPUs connected to a display to avoid driver - * timeouts and display freezing. */ - int min_blocks, num_threads_per_block; - cuda_assert(cuOccupancyMaxPotentialBlockSize( - &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0)); - if (!info.display_device) { - min_blocks *= 8; - } - - uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h); - - /* Render all samples. */ - int start_sample = rtile.start_sample; - int end_sample = rtile.start_sample + rtile.num_samples; - - for (int sample = start_sample; sample < end_sample; sample += step_samples) { - /* Setup and copy work tile to device. */ - wtile->start_sample = sample; - wtile->num_samples = min(step_samples, end_sample - sample); - work_tiles.copy_to_device(); - - CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer); - uint total_work_size = wtile->w * wtile->h * wtile->num_samples; - uint num_blocks = divide_up(total_work_size, num_threads_per_block); - - /* Launch kernel. */ - void *args[] = {&d_work_tiles, &total_work_size}; - - cuda_assert(cuLaunchKernel( - cuPathTrace, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0)); - - cuda_assert(cuCtxSynchronize()); - - /* Update progress. */ - rtile.sample = sample + wtile->num_samples; - task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples); - - if (task.get_cancel()) { - if (task.need_finish_queue == false) - break; - } - } - } - - void film_convert(DeviceTask &task, - device_ptr buffer, - device_ptr rgba_byte, - device_ptr rgba_half) - { - if (have_error()) - return; - - CUDAContextScope scope(this); - - CUfunction cuFilmConvert; - CUdeviceptr d_rgba = map_pixels((rgba_byte) ? rgba_byte : rgba_half); - CUdeviceptr d_buffer = cuda_device_ptr(buffer); - - /* get kernel function */ - if (rgba_half) { - cuda_assert( - cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_half_float")); - } - else { - cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte")); - } - - float sample_scale = 1.0f / (task.sample + 1); - - /* pass in parameters */ - void *args[] = {&d_rgba, - &d_buffer, - &sample_scale, - &task.x, - &task.y, - &task.w, - &task.h, - &task.offset, - &task.stride}; - - /* launch kernel */ - int threads_per_block; - cuda_assert(cuFuncGetAttribute( - &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilmConvert)); - - int xthreads = (int)sqrt(threads_per_block); - int ythreads = (int)sqrt(threads_per_block); - int xblocks = (task.w + xthreads - 1) / xthreads; - int yblocks = (task.h + ythreads - 1) / ythreads; - - cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1)); - - cuda_assert(cuLaunchKernel(cuFilmConvert, - xblocks, - yblocks, - 1, /* blocks */ - xthreads, - ythreads, - 1, /* threads */ - 0, - 0, - args, - 0)); - - unmap_pixels((rgba_byte) ? rgba_byte : rgba_half); - - cuda_assert(cuCtxSynchronize()); - } - - void shader(DeviceTask &task) - { - if (have_error()) - return; - - CUDAContextScope scope(this); - - CUfunction cuShader; - CUdeviceptr d_input = cuda_device_ptr(task.shader_input); - CUdeviceptr d_output = cuda_device_ptr(task.shader_output); - - /* get kernel function */ - if (task.shader_eval_type >= SHADER_EVAL_BAKE) { - cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake")); - } - else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) { - cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_displace")); - } - else { - cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_background")); - } - - /* do tasks in smaller chunks, so we can cancel it */ - const int shader_chunk_size = 65536; - const int start = task.shader_x; - const int end = task.shader_x + task.shader_w; - int offset = task.offset; - - bool canceled = false; - for (int sample = 0; sample < task.num_samples && !canceled; sample++) { - for (int shader_x = start; shader_x < end; shader_x += shader_chunk_size) { - int shader_w = min(shader_chunk_size, end - shader_x); - - /* pass in parameters */ - void *args[8]; - int arg = 0; - args[arg++] = &d_input; - args[arg++] = &d_output; - args[arg++] = &task.shader_eval_type; - if (task.shader_eval_type >= SHADER_EVAL_BAKE) { - args[arg++] = &task.shader_filter; - } - args[arg++] = &shader_x; - args[arg++] = &shader_w; - args[arg++] = &offset; - args[arg++] = &sample; - - /* launch kernel */ - int threads_per_block; - cuda_assert(cuFuncGetAttribute( - &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader)); - - int xblocks = (shader_w + threads_per_block - 1) / threads_per_block; - - cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1)); - cuda_assert(cuLaunchKernel(cuShader, - xblocks, - 1, - 1, /* blocks */ - threads_per_block, - 1, - 1, /* threads */ - 0, - 0, - args, - 0)); - - cuda_assert(cuCtxSynchronize()); - - if (task.get_cancel()) { - canceled = true; - break; - } - } - - task.update_progress(NULL); - } - } - - CUdeviceptr map_pixels(device_ptr mem) - { - if (!background) { - PixelMem pmem = pixel_mem_map[mem]; - CUdeviceptr buffer; - - size_t bytes; - cuda_assert(cuGraphicsMapResources(1, &pmem.cuPBOresource, 0)); - cuda_assert(cuGraphicsResourceGetMappedPointer(&buffer, &bytes, pmem.cuPBOresource)); - - return buffer; - } - - return cuda_device_ptr(mem); - } - - void unmap_pixels(device_ptr mem) - { - if (!background) { - PixelMem pmem = pixel_mem_map[mem]; - - cuda_assert(cuGraphicsUnmapResources(1, &pmem.cuPBOresource, 0)); - } - } - - void pixels_alloc(device_memory &mem) - { - PixelMem pmem; - - pmem.w = mem.data_width; - pmem.h = mem.data_height; - - CUDAContextScope scope(this); - - glGenBuffers(1, &pmem.cuPBO); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); - if (mem.data_type == TYPE_HALF) - glBufferData( - GL_PIXEL_UNPACK_BUFFER, pmem.w * pmem.h * sizeof(GLhalf) * 4, NULL, GL_DYNAMIC_DRAW); - else - glBufferData( - GL_PIXEL_UNPACK_BUFFER, pmem.w * pmem.h * sizeof(uint8_t) * 4, NULL, GL_DYNAMIC_DRAW); - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - - glActiveTexture(GL_TEXTURE0); - glGenTextures(1, &pmem.cuTexId); - glBindTexture(GL_TEXTURE_2D, pmem.cuTexId); - if (mem.data_type == TYPE_HALF) - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL); - else - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glBindTexture(GL_TEXTURE_2D, 0); - - CUresult result = cuGraphicsGLRegisterBuffer( - &pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE); - - if (result == CUDA_SUCCESS) { - mem.device_pointer = pmem.cuTexId; - pixel_mem_map[mem.device_pointer] = pmem; - - mem.device_size = mem.memory_size(); - stats.mem_alloc(mem.device_size); - - return; - } - else { - /* failed to register buffer, fallback to no interop */ - glDeleteBuffers(1, &pmem.cuPBO); - glDeleteTextures(1, &pmem.cuTexId); - - background = true; - } - } - - void pixels_copy_from(device_memory &mem, int y, int w, int h) - { - PixelMem pmem = pixel_mem_map[mem.device_pointer]; - - CUDAContextScope scope(this); - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); - uchar *pixels = (uchar *)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY); - size_t offset = sizeof(uchar) * 4 * y * w; - memcpy((uchar *)mem.host_pointer + offset, pixels + offset, sizeof(uchar) * 4 * w * h); - glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - } - - void pixels_free(device_memory &mem) - { - if (mem.device_pointer) { - PixelMem pmem = pixel_mem_map[mem.device_pointer]; - - CUDAContextScope scope(this); - - cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource)); - glDeleteBuffers(1, &pmem.cuPBO); - glDeleteTextures(1, &pmem.cuTexId); - - pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer)); - mem.device_pointer = 0; - - stats.mem_free(mem.device_size); - mem.device_size = 0; - } - } - - void draw_pixels(device_memory &mem, - int y, - int w, - int h, - int width, - int height, - int dx, - int dy, - int dw, - int dh, - bool transparent, - const DeviceDrawParams &draw_params) - { - assert(mem.type == MEM_PIXELS); - - if (!background) { - const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL); - PixelMem pmem = pixel_mem_map[mem.device_pointer]; - float *vpointer; - - CUDAContextScope scope(this); - - /* for multi devices, this assumes the inefficient method that we allocate - * all pixels on the device even though we only render to a subset */ - size_t offset = 4 * y * w; - - if (mem.data_type == TYPE_HALF) - offset *= sizeof(GLhalf); - else - offset *= sizeof(uint8_t); - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, pmem.cuTexId); - if (mem.data_type == TYPE_HALF) { - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_HALF_FLOAT, (void *)offset); - } - else { - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, (void *)offset); - } - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - - if (transparent) { - glEnable(GL_BLEND); - glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA); - } - - GLint shader_program; - if (use_fallback_shader) { - if (!bind_fallback_display_space_shader(dw, dh)) { - return; - } - shader_program = fallback_shader_program; - } - else { - draw_params.bind_display_space_shader_cb(); - glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program); - } - - if (!vertex_buffer) { - glGenBuffers(1, &vertex_buffer); - } - - glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer); - /* invalidate old contents - - * avoids stalling if buffer is still waiting in queue to be rendered */ - glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW); - - vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY); - - if (vpointer) { - /* texture coordinate - vertex pair */ - vpointer[0] = 0.0f; - vpointer[1] = 0.0f; - vpointer[2] = dx; - vpointer[3] = dy; - - vpointer[4] = (float)w / (float)pmem.w; - vpointer[5] = 0.0f; - vpointer[6] = (float)width + dx; - vpointer[7] = dy; - - vpointer[8] = (float)w / (float)pmem.w; - vpointer[9] = (float)h / (float)pmem.h; - vpointer[10] = (float)width + dx; - vpointer[11] = (float)height + dy; - - vpointer[12] = 0.0f; - vpointer[13] = (float)h / (float)pmem.h; - vpointer[14] = dx; - vpointer[15] = (float)height + dy; - - glUnmapBuffer(GL_ARRAY_BUFFER); - } - - GLuint vertex_array_object; - GLuint position_attribute, texcoord_attribute; - - glGenVertexArrays(1, &vertex_array_object); - glBindVertexArray(vertex_array_object); - - texcoord_attribute = glGetAttribLocation(shader_program, "texCoord"); - position_attribute = glGetAttribLocation(shader_program, "pos"); - - glEnableVertexAttribArray(texcoord_attribute); - glEnableVertexAttribArray(position_attribute); - - glVertexAttribPointer( - texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0); - glVertexAttribPointer(position_attribute, - 2, - GL_FLOAT, - GL_FALSE, - 4 * sizeof(float), - (const GLvoid *)(sizeof(float) * 2)); - - glDrawArrays(GL_TRIANGLE_FAN, 0, 4); - - if (use_fallback_shader) { - glUseProgram(0); - } - else { - draw_params.unbind_display_space_shader_cb(); - } - - if (transparent) { - glDisable(GL_BLEND); - } - - glBindTexture(GL_TEXTURE_2D, 0); - - return; - } - - Device::draw_pixels(mem, y, w, h, width, height, dx, dy, dw, dh, transparent, draw_params); - } - - void thread_run(DeviceTask *task) - { - CUDAContextScope scope(this); - - if (task->type == DeviceTask::RENDER) { - DeviceRequestedFeatures requested_features; - if (use_split_kernel()) { - if (split_kernel == NULL) { - split_kernel = new CUDASplitKernel(this); - split_kernel->load_kernels(requested_features); - } - } - - device_vector<WorkTile> work_tiles(this, "work_tiles", MEM_READ_ONLY); - - /* keep rendering tiles until done */ - RenderTile tile; - DenoisingTask denoising(this, *task); - - while (task->acquire_tile(this, tile)) { - if (tile.task == RenderTile::PATH_TRACE) { - if (use_split_kernel()) { - device_only_memory<uchar> void_buffer(this, "void_buffer"); - split_kernel->path_trace(task, tile, void_buffer, void_buffer); - } - else { - path_trace(*task, tile, work_tiles); - } - } - else if (tile.task == RenderTile::DENOISE) { - tile.sample = tile.start_sample + tile.num_samples; - - denoise(tile, denoising); - - task->update_progress(&tile, tile.w * tile.h); - } - - task->release_tile(tile); - - if (task->get_cancel()) { - if (task->need_finish_queue == false) - break; - } - } - - work_tiles.free(); - } - else if (task->type == DeviceTask::SHADER) { - shader(*task); - - cuda_assert(cuCtxSynchronize()); - } - } - - class CUDADeviceTask : public DeviceTask { - public: - CUDADeviceTask(CUDADevice *device, DeviceTask &task) : DeviceTask(task) - { - run = function_bind(&CUDADevice::thread_run, device, this); - } - }; - - void task_add(DeviceTask &task) - { - CUDAContextScope scope(this); - - /* Load texture info. */ - load_texture_info(); - - /* Synchronize all memory copies before executing task. */ - cuda_assert(cuCtxSynchronize()); - - if (task.type == DeviceTask::FILM_CONVERT) { - /* must be done in main thread due to opengl access */ - film_convert(task, task.buffer, task.rgba_byte, task.rgba_half); - } - else { - task_pool.push(new CUDADeviceTask(this, task)); - } - } - - void task_wait() - { - task_pool.wait(); - } - - void task_cancel() - { - task_pool.cancel(); - } - - friend class CUDASplitKernelFunction; - friend class CUDASplitKernel; - friend class CUDAContextScope; -}; - -/* redefine the cuda_assert macro so it can be used outside of the CUDADevice class - * now that the definition of that class is complete - */ -#undef cuda_assert -#define cuda_assert(stmt) \ - { \ - CUresult result = stmt; \ -\ - if (result != CUDA_SUCCESS) { \ - string message = string_printf("CUDA error: %s in %s", cuewErrorString(result), #stmt); \ - if (device->error_msg == "") \ - device->error_msg = message; \ - fprintf(stderr, "%s\n", message.c_str()); \ - /*cuda_abort();*/ \ - device->cuda_error_documentation(); \ - } \ - } \ - (void)0 - -/* CUDA context scope. */ - -CUDAContextScope::CUDAContextScope(CUDADevice *device) : device(device) -{ - cuda_assert(cuCtxPushCurrent(device->cuContext)); -} - -CUDAContextScope::~CUDAContextScope() -{ - cuda_assert(cuCtxPopCurrent(NULL)); -} - -/* split kernel */ - -class CUDASplitKernelFunction : public SplitKernelFunction { - CUDADevice *device; - CUfunction func; - - public: - CUDASplitKernelFunction(CUDADevice *device, CUfunction func) : device(device), func(func) - { - } - - /* enqueue the kernel, returns false if there is an error */ - bool enqueue(const KernelDimensions &dim, device_memory & /*kg*/, device_memory & /*data*/) - { - return enqueue(dim, NULL); - } - - /* enqueue the kernel, returns false if there is an error */ - bool enqueue(const KernelDimensions &dim, void *args[]) - { - if (device->have_error()) - return false; - - CUDAContextScope scope(device); - - /* we ignore dim.local_size for now, as this is faster */ - int threads_per_block; - cuda_assert( - cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); - - int xblocks = (dim.global_size[0] * dim.global_size[1] + threads_per_block - 1) / - threads_per_block; - - cuda_assert(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1)); - - cuda_assert(cuLaunchKernel(func, - xblocks, - 1, - 1, /* blocks */ - threads_per_block, - 1, - 1, /* threads */ - 0, - 0, - args, - 0)); - - return !device->have_error(); - } -}; - -CUDASplitKernel::CUDASplitKernel(CUDADevice *device) : DeviceSplitKernel(device), device(device) -{ -} - -uint64_t CUDASplitKernel::state_buffer_size(device_memory & /*kg*/, - device_memory & /*data*/, - size_t num_threads) -{ - CUDAContextScope scope(device); - - device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE); - size_buffer.alloc(1); - size_buffer.zero_to_device(); - - uint threads = num_threads; - CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer); - - struct args_t { - uint *num_threads; - CUdeviceptr *size; - }; - - args_t args = {&threads, &d_size}; - - CUfunction state_buffer_size; - cuda_assert( - cuModuleGetFunction(&state_buffer_size, device->cuModule, "kernel_cuda_state_buffer_size")); - - cuda_assert(cuLaunchKernel(state_buffer_size, 1, 1, 1, 1, 1, 1, 0, 0, (void **)&args, 0)); - - size_buffer.copy_from_device(0, 1, 1); - size_t size = size_buffer[0]; - size_buffer.free(); - - return size; -} - -bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions &dim, - RenderTile &rtile, - int num_global_elements, - device_memory & /*kernel_globals*/, - device_memory & /*kernel_data*/, - device_memory &split_data, - device_memory &ray_state, - device_memory &queue_index, - device_memory &use_queues_flag, - device_memory &work_pool_wgs) -{ - CUDAContextScope scope(device); - - CUdeviceptr d_split_data = device->cuda_device_ptr(split_data.device_pointer); - CUdeviceptr d_ray_state = device->cuda_device_ptr(ray_state.device_pointer); - CUdeviceptr d_queue_index = device->cuda_device_ptr(queue_index.device_pointer); - CUdeviceptr d_use_queues_flag = device->cuda_device_ptr(use_queues_flag.device_pointer); - CUdeviceptr d_work_pool_wgs = device->cuda_device_ptr(work_pool_wgs.device_pointer); - - CUdeviceptr d_buffer = device->cuda_device_ptr(rtile.buffer); - - int end_sample = rtile.start_sample + rtile.num_samples; - int queue_size = dim.global_size[0] * dim.global_size[1]; - - struct args_t { - CUdeviceptr *split_data_buffer; - int *num_elements; - CUdeviceptr *ray_state; - int *start_sample; - int *end_sample; - int *sx; - int *sy; - int *sw; - int *sh; - int *offset; - int *stride; - CUdeviceptr *queue_index; - int *queuesize; - CUdeviceptr *use_queues_flag; - CUdeviceptr *work_pool_wgs; - int *num_samples; - CUdeviceptr *buffer; - }; - - args_t args = {&d_split_data, - &num_global_elements, - &d_ray_state, - &rtile.start_sample, - &end_sample, - &rtile.x, - &rtile.y, - &rtile.w, - &rtile.h, - &rtile.offset, - &rtile.stride, - &d_queue_index, - &queue_size, - &d_use_queues_flag, - &d_work_pool_wgs, - &rtile.num_samples, - &d_buffer}; - - CUfunction data_init; - cuda_assert( - cuModuleGetFunction(&data_init, device->cuModule, "kernel_cuda_path_trace_data_init")); - if (device->have_error()) { - return false; - } - - CUDASplitKernelFunction(device, data_init).enqueue(dim, (void **)&args); - - return !device->have_error(); -} - -SplitKernelFunction *CUDASplitKernel::get_split_kernel_function(const string &kernel_name, - const DeviceRequestedFeatures &) -{ - CUDAContextScope scope(device); - CUfunction func; - - cuda_assert( - cuModuleGetFunction(&func, device->cuModule, (string("kernel_cuda_") + kernel_name).data())); - if (device->have_error()) { - device->cuda_error_message( - string_printf("kernel \"kernel_cuda_%s\" not found in module", kernel_name.data())); - return NULL; - } - - return new CUDASplitKernelFunction(device, func); -} - -int2 CUDASplitKernel::split_kernel_local_size() -{ - return make_int2(32, 1); -} - -int2 CUDASplitKernel::split_kernel_global_size(device_memory &kg, - device_memory &data, - DeviceTask * /*task*/) -{ - CUDAContextScope scope(device); - size_t free; - size_t total; - - cuda_assert(cuMemGetInfo(&free, &total)); - - VLOG(1) << "Maximum device allocation size: " << string_human_readable_number(free) - << " bytes. (" << string_human_readable_size(free) << ")."; - - size_t num_elements = max_elements_for_max_buffer_size(kg, data, free / 2); - size_t side = round_down((int)sqrt(num_elements), 32); - int2 global_size = make_int2(side, round_down(num_elements / side, 16)); - VLOG(1) << "Global size: " << global_size << "."; - return global_size; -} - bool device_cuda_init() { -#ifdef WITH_CUDA_DYNLOAD +# ifdef WITH_CUDA_DYNLOAD static bool initialized = false; static bool result = false; @@ -2584,7 +43,6 @@ bool device_cuda_init() VLOG(1) << "Found precompiled kernels"; result = true; } -# ifndef _WIN32 else if (cuewCompilerPath() != NULL) { VLOG(1) << "Found CUDA compiler " << cuewCompilerPath(); result = true; @@ -2593,7 +51,6 @@ bool device_cuda_init() VLOG(1) << "Neither precompiled kernels nor CUDA compiler was found," << " unable to use CUDA"; } -# endif } else { VLOG(1) << "CUEW initialization failed: " @@ -2602,9 +59,9 @@ bool device_cuda_init() } return result; -#else /* WITH_CUDA_DYNLOAD */ +# else /* WITH_CUDA_DYNLOAD */ return true; -#endif /* WITH_CUDA_DYNLOAD */ +# endif /* WITH_CUDA_DYNLOAD */ } Device *device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background) @@ -2614,7 +71,7 @@ Device *device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, b static CUresult device_cuda_safe_init() { -#ifdef _WIN32 +# ifdef _WIN32 __try { return cuInit(0); } @@ -2625,9 +82,9 @@ static CUresult device_cuda_safe_init() } return CUDA_ERROR_NO_DEVICE; -#else +# else return cuInit(0); -#endif +# endif } void device_cuda_info(vector<DeviceInfo> &devices) @@ -2739,13 +196,13 @@ string device_cuda_capabilities() } capabilities += string("\t") + name + "\n"; int value; -#define GET_ATTR(attr) \ - { \ - if (cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_##attr, num) == CUDA_SUCCESS) { \ - capabilities += string_printf("\t\tCU_DEVICE_ATTRIBUTE_" #attr "\t\t\t%d\n", value); \ +# define GET_ATTR(attr) \ + { \ + if (cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_##attr, num) == CUDA_SUCCESS) { \ + capabilities += string_printf("\t\tCU_DEVICE_ATTRIBUTE_" #attr "\t\t\t%d\n", value); \ + } \ } \ - } \ - (void)0 + (void)0 /* TODO(sergey): Strip all attributes which are not useful for us * or does not depend on the driver. */ @@ -2836,7 +293,7 @@ string device_cuda_capabilities() GET_ATTR(MANAGED_MEMORY); GET_ATTR(MULTI_GPU_BOARD); GET_ATTR(MULTI_GPU_BOARD_GROUP_ID); -#undef GET_ATTR +# undef GET_ATTR capabilities += "\n"; } @@ -2844,3 +301,5 @@ string device_cuda_capabilities() } CCL_NAMESPACE_END + +#endif diff --git a/intern/cycles/device/device_intern.h b/intern/cycles/device/device_intern.h index 5b8b86886c4..0c229ac24cf 100644 --- a/intern/cycles/device/device_intern.h +++ b/intern/cycles/device/device_intern.h @@ -17,9 +17,15 @@ #ifndef __DEVICE_INTERN_H__ #define __DEVICE_INTERN_H__ +#include "util/util_string.h" +#include "util/util_vector.h" + CCL_NAMESPACE_BEGIN class Device; +class DeviceInfo; +class Profiler; +class Stats; Device *device_cpu_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background); bool device_opencl_init(); diff --git a/intern/cycles/device/device_memory.cpp b/intern/cycles/device/device_memory.cpp index 3a99a49dffc..671cd7c29f3 100644 --- a/intern/cycles/device/device_memory.cpp +++ b/intern/cycles/device/device_memory.cpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "device/device.h" #include "device/device_memory.h" +#include "device/device.h" CCL_NAMESPACE_BEGIN @@ -31,8 +31,6 @@ device_memory::device_memory(Device *device, const char *name, MemoryType type) data_depth(0), type(type), name(name), - interpolation(INTERPOLATION_NONE), - extension(EXTENSION_REPEAT), device(device), device_pointer(0), host_pointer(0), @@ -76,7 +74,7 @@ void device_memory::host_free() void device_memory::device_alloc() { - assert(!device_pointer && type != MEM_TEXTURE); + assert(!device_pointer && type != MEM_TEXTURE && type != MEM_GLOBAL); device->mem_alloc(*this); } @@ -96,7 +94,7 @@ void device_memory::device_copy_to() void device_memory::device_copy_from(int y, int w, int h, int elem) { - assert(type != MEM_TEXTURE && type != MEM_READ_ONLY); + assert(type != MEM_TEXTURE && type != MEM_READ_ONLY && type != MEM_GLOBAL); device->mem_copy_from(*this, y, w, h, elem); } @@ -139,4 +137,93 @@ device_sub_ptr::~device_sub_ptr() device->mem_free_sub_ptr(ptr); } +/* Device Texture */ + +device_texture::device_texture(Device *device, + const char *name, + const uint slot, + ImageDataType image_data_type, + InterpolationType interpolation, + ExtensionType extension) + : device_memory(device, name, MEM_TEXTURE), slot(slot) +{ + switch (image_data_type) { + case IMAGE_DATA_TYPE_FLOAT4: + data_type = TYPE_FLOAT; + data_elements = 4; + break; + case IMAGE_DATA_TYPE_FLOAT: + data_type = TYPE_FLOAT; + data_elements = 1; + break; + case IMAGE_DATA_TYPE_BYTE4: + data_type = TYPE_UCHAR; + data_elements = 4; + break; + case IMAGE_DATA_TYPE_BYTE: + data_type = TYPE_UCHAR; + data_elements = 1; + break; + case IMAGE_DATA_TYPE_HALF4: + data_type = TYPE_HALF; + data_elements = 4; + break; + case IMAGE_DATA_TYPE_HALF: + data_type = TYPE_HALF; + data_elements = 1; + break; + case IMAGE_DATA_TYPE_USHORT4: + data_type = TYPE_UINT16; + data_elements = 4; + break; + case IMAGE_DATA_TYPE_USHORT: + data_type = TYPE_UINT16; + data_elements = 1; + break; + case IMAGE_DATA_NUM_TYPES: + assert(0); + return; + } + + memset(&info, 0, sizeof(info)); + info.data_type = image_data_type; + info.interpolation = interpolation; + info.extension = extension; +} + +device_texture::~device_texture() +{ + device_free(); + host_free(); +} + +/* Host memory allocation. */ +void *device_texture::alloc(const size_t width, const size_t height, const size_t depth) +{ + const size_t new_size = size(width, height, depth); + + if (new_size != data_size) { + device_free(); + host_free(); + host_pointer = host_alloc(data_elements * datatype_size(data_type) * new_size); + assert(device_pointer == 0); + } + + data_size = new_size; + data_width = width; + data_height = height; + data_depth = depth; + + info.width = width; + info.height = height; + info.depth = depth; + + return host_pointer; +} + +void device_texture::copy_to_device() +{ + device_copy_to(); +} + CCL_NAMESPACE_END diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h index 60740807568..1c20db900bc 100644 --- a/intern/cycles/device/device_memory.h +++ b/intern/cycles/device/device_memory.h @@ -23,6 +23,7 @@ #include "util/util_array.h" #include "util/util_half.h" +#include "util/util_string.h" #include "util/util_texture.h" #include "util/util_types.h" #include "util/util_vector.h" @@ -31,7 +32,14 @@ CCL_NAMESPACE_BEGIN class Device; -enum MemoryType { MEM_READ_ONLY, MEM_READ_WRITE, MEM_DEVICE_ONLY, MEM_TEXTURE, MEM_PIXELS }; +enum MemoryType { + MEM_READ_ONLY, + MEM_READ_WRITE, + MEM_DEVICE_ONLY, + MEM_GLOBAL, + MEM_TEXTURE, + MEM_PIXELS +}; /* Supported Data Types */ @@ -208,8 +216,6 @@ class device_memory { size_t data_depth; MemoryType type; const char *name; - InterpolationType interpolation; - ExtensionType extension; /* Pointers. */ Device *device; @@ -310,7 +316,7 @@ template<typename T> class device_only_memory : public device_memory { * in and copied to the device with copy_to_device(). Or alternatively * allocated and set to zero on the device with zero_to_device(). * - * When using memory type MEM_TEXTURE, a pointer to this memory will be + * When using memory type MEM_GLOBAL, a pointer to this memory will be * automatically attached to kernel globals, using the provided name * matching an entry in kernel_textures.h. */ @@ -427,6 +433,11 @@ template<typename T> class device_vector : public device_memory { device_copy_to(); } + void copy_from_device() + { + device_copy_from(0, data_width, data_height, sizeof(T)); + } + void copy_from_device(int y, int w, int h) { device_copy_from(y, w, h, sizeof(T)); @@ -498,6 +509,33 @@ class device_sub_ptr { device_ptr ptr; }; +/* Device Texture + * + * 2D or 3D image texture memory. */ + +class device_texture : public device_memory { + public: + device_texture(Device *device, + const char *name, + const uint slot, + ImageDataType image_data_type, + InterpolationType interpolation, + ExtensionType extension); + ~device_texture(); + + void *alloc(const size_t width, const size_t height, const size_t depth = 0); + void copy_to_device(); + + uint slot; + TextureInfo info; + + protected: + size_t size(const size_t width, const size_t height, const size_t depth) + { + return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth); + } +}; + CCL_NAMESPACE_END #endif /* __DEVICE_MEMORY_H__ */ diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index b8587eb0a62..3636ecaa7a1 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#include <stdlib.h> #include <sstream> +#include <stdlib.h> #include "device/device.h" #include "device/device_intern.h" @@ -42,7 +42,7 @@ class MultiDevice : public Device { map<device_ptr, device_ptr> ptr_map; }; - list<SubDevice> devices; + list<SubDevice> devices, denoising_devices; device_ptr unique_key; MultiDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_) @@ -61,6 +61,12 @@ class MultiDevice : public Device { } } + foreach (DeviceInfo &subinfo, info.denoising_devices) { + Device *device = Device::create(subinfo, sub_stats_, profiler, background); + + denoising_devices.push_back(SubDevice(device)); + } + #ifdef WITH_NETWORK /* try to add network devices */ ServerDiscovery discovery(true); @@ -80,17 +86,18 @@ class MultiDevice : public Device { { foreach (SubDevice &sub, devices) delete sub.device; + foreach (SubDevice &sub, denoising_devices) + delete sub.device; } const string &error_message() { - foreach (SubDevice &sub, devices) { - if (sub.device->error_message() != "") { - if (error_msg == "") - error_msg = sub.device->error_message(); - break; - } - } + error_msg.clear(); + + foreach (SubDevice &sub, devices) + error_msg += sub.device->error_message(); + foreach (SubDevice &sub, denoising_devices) + error_msg += sub.device->error_message(); return error_msg; } @@ -118,6 +125,12 @@ class MultiDevice : public Device { if (!sub.device->load_kernels(requested_features)) return false; + if (requested_features.use_denoising) { + foreach (SubDevice &sub, denoising_devices) + if (!sub.device->load_kernels(requested_features)) + return false; + } + return true; } @@ -127,6 +140,12 @@ class MultiDevice : public Device { if (!sub.device->wait_for_availability(requested_features)) return false; + if (requested_features.use_denoising) { + foreach (SubDevice &sub, denoising_devices) + if (!sub.device->wait_for_availability(requested_features)) + return false; + } + return true; } @@ -150,19 +169,28 @@ class MultiDevice : public Device { break; } } + return result; } bool build_optix_bvh(BVH *bvh) { - // Broadcast acceleration structure build to all devices - foreach (SubDevice &sub, devices) { + // Broadcast acceleration structure build to all render devices + foreach (SubDevice &sub, devices) if (!sub.device->build_optix_bvh(bvh)) return false; - } + return true; } + virtual void *osl_memory() + { + if (devices.size() > 1) { + return NULL; + } + return devices.front().device->osl_memory(); + } + void mem_alloc(device_memory &mem) { device_ptr key = unique_key++; @@ -236,6 +264,17 @@ class MultiDevice : public Device { sub.ptr_map[key] = mem.device_pointer; } + if (strcmp(mem.name, "RenderBuffers") == 0) { + foreach (SubDevice &sub, denoising_devices) { + mem.device = sub.device; + mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0; + mem.device_size = existing_size; + + sub.device->mem_zero(mem); + sub.ptr_map[key] = mem.device_pointer; + } + } + mem.device = this; mem.device_pointer = key; stats.mem_alloc(mem.device_size - existing_size); @@ -255,6 +294,17 @@ class MultiDevice : public Device { sub.ptr_map.erase(sub.ptr_map.find(key)); } + if (strcmp(mem.name, "RenderBuffers") == 0) { + foreach (SubDevice &sub, denoising_devices) { + mem.device = sub.device; + mem.device_pointer = sub.ptr_map[key]; + mem.device_size = existing_size; + + sub.device->mem_free(mem); + sub.ptr_map.erase(sub.ptr_map.find(key)); + } + } + mem.device = this; mem.device_pointer = 0; mem.device_size = 0; @@ -302,10 +352,21 @@ class MultiDevice : public Device { void map_tile(Device *sub_device, RenderTile &tile) { + if (!tile.buffer) { + return; + } + foreach (SubDevice &sub, devices) { if (sub.device == sub_device) { - if (tile.buffer) - tile.buffer = sub.ptr_map[tile.buffer]; + tile.buffer = sub.ptr_map[tile.buffer]; + return; + } + } + + foreach (SubDevice &sub, denoising_devices) { + if (sub.device == sub_device) { + tile.buffer = sub.ptr_map[tile.buffer]; + return; } } } @@ -320,6 +381,12 @@ class MultiDevice : public Device { i++; } + foreach (SubDevice &sub, denoising_devices) { + if (sub.device == sub_device) + return i; + i++; + } + return -1; } @@ -330,24 +397,41 @@ class MultiDevice : public Device { continue; } + device_vector<float> &mem = tiles[i].buffers->buffer; + tiles[i].buffer = mem.device_pointer; + + if (mem.device == this && denoising_devices.empty()) { + /* Skip unnecessary copies in viewport mode (buffer covers the + * whole image), but still need to fix up the tile device pointer. */ + map_tile(sub_device, tiles[i]); + continue; + } + /* If the tile was rendered on another device, copy its memory to * to the current device now, for the duration of the denoising task. * Note that this temporarily modifies the RenderBuffers and calls * the device, so this function is not thread safe. */ - device_vector<float> &mem = tiles[i].buffers->buffer; if (mem.device != sub_device) { /* Only copy from device to host once. This is faster, but * also required for the case where a CPU thread is denoising * a tile rendered on the GPU. In that case we have to avoid - * overwriting the buffer being denoised by the CPU thread. */ + * overwriting the buffer being de-noised by the CPU thread. */ if (!tiles[i].buffers->map_neighbor_copied) { tiles[i].buffers->map_neighbor_copied = true; - mem.copy_from_device(0, mem.data_size, 1); + mem.copy_from_device(); } - mem.swap_device(sub_device, 0, 0); + if (mem.device == this) { + /* Can re-use memory if tile is already allocated on the sub device. */ + map_tile(sub_device, tiles[i]); + mem.swap_device(sub_device, mem.device_size, tiles[i].buffer); + } + else { + mem.swap_device(sub_device, 0, 0); + } mem.copy_to_device(); + tiles[i].buffer = mem.device_pointer; tiles[i].device_size = mem.device_size; @@ -358,11 +442,17 @@ class MultiDevice : public Device { void unmap_neighbor_tiles(Device *sub_device, RenderTile *tiles) { - /* Copy denoised result back to the host. */ device_vector<float> &mem = tiles[9].buffers->buffer; + + if (mem.device == this && denoising_devices.empty()) { + return; + } + + /* Copy denoised result back to the host. */ mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer); - mem.copy_from_device(0, mem.data_size, 1); + mem.copy_from_device(); mem.restore_device(); + /* Copy denoised result to the original device. */ mem.copy_to_device(); @@ -372,7 +462,9 @@ class MultiDevice : public Device { } device_vector<float> &mem = tiles[i].buffers->buffer; - if (mem.device != sub_device) { + + if (mem.device != sub_device && mem.device != this) { + /* Free up memory again if it was allocated for the copy above. */ mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer); sub_device->mem_free(mem); mem.restore_device(); @@ -398,10 +490,29 @@ class MultiDevice : public Device { void task_add(DeviceTask &task) { + list<SubDevice> task_devices = devices; + if (!denoising_devices.empty()) { + if (task.type == DeviceTask::DENOISE_BUFFER) { + /* Denoising tasks should be redirected to the denoising devices entirely. */ + task_devices = denoising_devices; + } + else if (task.type == DeviceTask::RENDER && (task.tile_types & RenderTile::DENOISE)) { + const uint tile_types = task.tile_types; + /* For normal rendering tasks only redirect the denoising part to the denoising devices. + * Do not need to split the task here, since they all run through 'acquire_tile'. */ + task.tile_types = RenderTile::DENOISE; + foreach (SubDevice &sub, denoising_devices) { + sub.device->task_add(task); + } + /* Rendering itself should still be executed on the rendering devices. */ + task.tile_types = tile_types ^ RenderTile::DENOISE; + } + } + list<DeviceTask> tasks; - task.split(tasks, devices.size()); + task.split(tasks, task_devices.size()); - foreach (SubDevice &sub, devices) { + foreach (SubDevice &sub, task_devices) { if (!tasks.empty()) { DeviceTask subtask = tasks.front(); tasks.pop_front(); @@ -426,12 +537,16 @@ class MultiDevice : public Device { { foreach (SubDevice &sub, devices) sub.device->task_wait(); + foreach (SubDevice &sub, denoising_devices) + sub.device->task_wait(); } void task_cancel() { foreach (SubDevice &sub, devices) sub.device->task_cancel(); + foreach (SubDevice &sub, denoising_devices) + sub.device->task_cancel(); } protected: diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp index 80334ad8f22..2742cbf53aa 100644 --- a/intern/cycles/device/device_network.cpp +++ b/intern/cycles/device/device_network.cpp @@ -14,9 +14,9 @@ * limitations under the License. */ +#include "device/device_network.h" #include "device/device.h" #include "device/device_intern.h" -#include "device/device_network.h" #include "util/util_foreach.h" #include "util/util_logging.h" diff --git a/intern/cycles/device/device_network.h b/intern/cycles/device/device_network.h index 5b69b815cc6..e74c4508ab6 100644 --- a/intern/cycles/device/device_network.h +++ b/intern/cycles/device/device_network.h @@ -19,19 +19,19 @@ #ifdef WITH_NETWORK -# include <boost/archive/text_iarchive.hpp> -# include <boost/archive/text_oarchive.hpp> # include <boost/archive/binary_iarchive.hpp> # include <boost/archive/binary_oarchive.hpp> +# include <boost/archive/text_iarchive.hpp> +# include <boost/archive/text_oarchive.hpp> # include <boost/array.hpp> # include <boost/asio.hpp> # include <boost/bind.hpp> # include <boost/serialization/vector.hpp> # include <boost/thread.hpp> +# include <deque> # include <iostream> # include <sstream> -# include <deque> # include "render/buffers.h" diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index b07596c60ff..891b73351a0 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -16,8 +16,8 @@ #ifdef WITH_OPENCL -# include "device/opencl/opencl.h" - +# include "device/opencl/device_opencl.h" +# include "device/device.h" # include "device/device_intern.h" # include "util/util_foreach.h" diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index c1106b367ca..42d7b00314c 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -17,30 +17,28 @@ #ifdef WITH_OPTIX -# include "device/device.h" -# include "device/device_intern.h" -# include "device/device_denoising.h" # include "bvh/bvh.h" -# include "render/scene.h" +# include "device/cuda/device_cuda.h" +# include "device/device_denoising.h" +# include "device/device_intern.h" +# include "render/buffers.h" +# include "render/hair.h" # include "render/mesh.h" # include "render/object.h" -# include "render/buffers.h" +# include "render/scene.h" +# include "util/util_debug.h" +# include "util/util_logging.h" # include "util/util_md5.h" # include "util/util_path.h" # include "util/util_time.h" -# include "util/util_debug.h" -# include "util/util_logging.h" - -# undef _WIN32_WINNT // Need minimum API support for Windows 7 -# define _WIN32_WINNT _WIN32_WINNT_WIN7 # ifdef WITH_CUDA_DYNLOAD # include <cuew.h> // Do not use CUDA SDK headers when using CUEW # define OPTIX_DONT_INCLUDE_CUDA # endif -# include <optix_stubs.h> # include <optix_function_table_definition.h> +# include <optix_stubs.h> // TODO(pmours): Disable this once drivers have native support # define OPTIX_DENOISER_NO_PIXEL_STRIDE 1 @@ -110,31 +108,23 @@ struct KernelParams { } \ (void)0 -# define CUDA_GET_BLOCKSIZE(func, w, h) \ - int threads; \ - check_result_cuda_ret( \ - cuFuncGetAttribute(&threads, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \ - threads = (int)sqrt((float)threads); \ - int xblocks = ((w) + threads - 1) / threads; \ - int yblocks = ((h) + threads - 1) / threads; - -# define CUDA_LAUNCH_KERNEL(func, args) \ - check_result_cuda_ret(cuLaunchKernel( \ - func, xblocks, yblocks, 1, threads, threads, 1, 0, cuda_stream[thread_index], args, 0)); - -/* Similar as above, but for 1-dimensional blocks. */ -# define CUDA_GET_BLOCKSIZE_1D(func, w, h) \ - int threads; \ - check_result_cuda_ret( \ - cuFuncGetAttribute(&threads, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \ - int xblocks = ((w) + threads - 1) / threads; \ - int yblocks = h; - -# define CUDA_LAUNCH_KERNEL_1D(func, args) \ - check_result_cuda_ret(cuLaunchKernel( \ - func, xblocks, yblocks, 1, threads, 1, 1, 0, cuda_stream[thread_index], args, 0)); +# define launch_filter_kernel(func_name, w, h, args) \ + { \ + CUfunction func; \ + check_result_cuda_ret(cuModuleGetFunction(&func, cuFilterModule, func_name)); \ + check_result_cuda_ret(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1)); \ + int threads; \ + check_result_cuda_ret( \ + cuFuncGetAttribute(&threads, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \ + threads = (int)sqrt((float)threads); \ + int xblocks = ((w) + threads - 1) / threads; \ + int yblocks = ((h) + threads - 1) / threads; \ + check_result_cuda_ret( \ + cuLaunchKernel(func, xblocks, yblocks, 1, threads, threads, 1, 0, 0, args, 0)); \ + } \ + (void)0 -class OptiXDevice : public Device { +class OptiXDevice : public CUDADevice { // List of OptiX program groups enum { @@ -183,77 +173,37 @@ class OptiXDevice : public Device { // Use a pool with multiple threads to support launches with multiple CUDA streams TaskPool task_pool; - // CUDA/OptiX context handles - CUdevice cuda_device = 0; - CUcontext cuda_context = NULL; vector<CUstream> cuda_stream; OptixDeviceContext context = NULL; - // Need CUDA kernel module for some utility functions - CUmodule cuda_module = NULL; - CUmodule cuda_filter_module = NULL; - // All necessary OptiX kernels are in one module - OptixModule optix_module = NULL; + OptixModule optix_module = NULL; // All necessary OptiX kernels are in one module OptixPipeline pipelines[NUM_PIPELINES] = {}; bool motion_blur = false; - bool need_texture_info = false; device_vector<SbtRecord> sbt_data; - device_vector<TextureInfo> texture_info; device_only_memory<KernelParams> launch_params; vector<CUdeviceptr> as_mem; OptixTraversableHandle tlas_handle = 0; - // TODO(pmours): This is copied from device_cuda.cpp, so move to common code eventually - int can_map_host = 0; - size_t map_host_used = 0; - size_t map_host_limit = 0; - size_t device_working_headroom = 32 * 1024 * 1024LL; // 32MB - size_t device_texture_headroom = 128 * 1024 * 1024LL; // 128MB - map<device_memory *, CUDAMem> cuda_mem_map; - bool move_texture_to_host = false; - OptixDenoiser denoiser = NULL; - vector<pair<int2, CUdeviceptr>> denoiser_state; + device_only_memory<unsigned char> denoiser_state; + int denoiser_input_passes = 0; public: OptiXDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_) - : Device(info_, stats_, profiler_, background_), + : CUDADevice(info_, stats_, profiler_, background_), sbt_data(this, "__sbt", MEM_READ_ONLY), - texture_info(this, "__texture_info", MEM_TEXTURE), - launch_params(this, "__params") + launch_params(this, "__params"), + denoiser_state(this, "__denoiser_state") { // Store number of CUDA streams in device info info.cpu_threads = DebugFlags().optix.cuda_streams; - // Initialize CUDA driver API - check_result_cuda(cuInit(0)); - - // Retrieve the primary CUDA context for this device - check_result_cuda(cuDeviceGet(&cuda_device, info.num)); - check_result_cuda(cuDevicePrimaryCtxRetain(&cuda_context, cuda_device)); - - // Make that CUDA context current - const CUDAContextScope scope(cuda_context); - - // Limit amount of host mapped memory (see init_host_memory in device_cuda.cpp) - size_t default_limit = 4 * 1024 * 1024 * 1024LL; - size_t system_ram = system_physical_ram(); - if (system_ram > 0) { - if (system_ram / 2 > default_limit) { - map_host_limit = system_ram - default_limit; - } - else { - map_host_limit = system_ram / 2; - } - } - else { - VLOG(1) << "Mapped host memory disabled, failed to get system RAM"; + // Make the CUDA context current + if (!cuContext) { + return; // Do not initialize if CUDA context creation failed already } - - // Check device support for pinned host memory - check_result_cuda( - cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuda_device)); + const CUDAContextScope scope(cuContext); // Create OptiX context for this device OptixDeviceContextOptions options = {}; @@ -277,7 +227,7 @@ class OptiXDevice : public Device { } }; # endif - check_result_optix(optixDeviceContextCreate(cuda_context, &options, &context)); + check_result_optix(optixDeviceContextCreate(cuContext, &options, &context)); # ifdef WITH_CYCLES_LOGGING check_result_optix(optixDeviceContextSetLogCallback( context, options.logCallbackFunction, options.logCallbackData, options.logCallbackLevel)); @@ -292,37 +242,26 @@ class OptiXDevice : public Device { launch_params.data_elements = sizeof(KernelParams); // Allocate launch parameter buffer memory on device launch_params.alloc_to_device(info.cpu_threads); - - // Create denoiser state entries for all threads (but do not allocate yet) - denoiser_state.resize(info.cpu_threads); } ~OptiXDevice() { // Stop processing any more tasks task_pool.stop(); + // Make CUDA context current + const CUDAContextScope scope(cuContext); + // Free all acceleration structures for (CUdeviceptr mem : as_mem) { cuMemFree(mem); } - // Free denoiser state for all threads - for (const pair<int2, CUdeviceptr> &state : denoiser_state) { - cuMemFree(state.second); - } - sbt_data.free(); texture_info.free(); launch_params.free(); - - // Make CUDA context current - const CUDAContextScope scope(cuda_context); + denoiser_state.free(); // Unload modules - if (cuda_module != NULL) - cuModuleUnload(cuda_module); - if (cuda_filter_module != NULL) - cuModuleUnload(cuda_filter_module); if (optix_module != NULL) optixModuleDestroy(optix_module); for (unsigned int i = 0; i < NUM_PIPELINES; ++i) @@ -336,9 +275,7 @@ class OptiXDevice : public Device { if (denoiser != NULL) optixDenoiserDestroy(denoiser); - // Destroy OptiX and CUDA context optixDeviceContextDestroy(context); - cuDevicePrimaryCtxRelease(cuda_device); } private: @@ -354,10 +291,34 @@ class OptiXDevice : public Device { return BVH_LAYOUT_OPTIX; } + string compile_kernel_get_common_cflags(const DeviceRequestedFeatures &requested_features, + bool filter, + bool /*split*/) override + { + // Split kernel is not supported in OptiX + string common_cflags = CUDADevice::compile_kernel_get_common_cflags( + requested_features, filter, false); + + // Add OptiX SDK include directory to include paths + const char *optix_sdk_path = getenv("OPTIX_ROOT_DIR"); + if (optix_sdk_path) { + common_cflags += string_printf(" -I\"%s/include\"", optix_sdk_path); + } + + return common_cflags; + } + bool load_kernels(const DeviceRequestedFeatures &requested_features) override { - if (have_error()) - return false; // Abort early if context creation failed already + if (have_error()) { + // Abort early if context creation failed already + return false; + } + + // Load CUDA modules because we need some of the utility kernels + if (!CUDADevice::load_kernels(requested_features)) { + return false; + } // Disable baking for now, since its kernel is not well-suited for inlining and is very slow if (requested_features.use_baking) { @@ -370,7 +331,7 @@ class OptiXDevice : public Device { return false; } - const CUDAContextScope scope(cuda_context); + const CUDAContextScope scope(cuContext); // Unload existing OptiX module and pipelines first if (optix_module != NULL) { @@ -421,9 +382,11 @@ class OptiXDevice : public Device { } { // Load and compile PTX module with OptiX kernels - string ptx_data; - const string ptx_filename = "lib/kernel_optix.ptx"; - if (!path_read_text(path_get(ptx_filename), ptx_data)) { + string ptx_data, ptx_filename = path_get("lib/kernel_optix.ptx"); + if (use_adaptive_compilation()) { + ptx_filename = compile_kernel(requested_features, "kernel_optix", "optix", true); + } + if (ptx_filename.empty() || !path_read_text(ptx_filename, ptx_data)) { set_error("Failed loading OptiX kernel " + ptx_filename + "."); return false; } @@ -438,34 +401,6 @@ class OptiXDevice : public Device { &optix_module)); } - { // Load CUDA modules because we need some of the utility kernels - int major, minor; - cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, info.num); - cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, info.num); - - if (cuda_module == NULL) { // Avoid reloading module if it was already loaded - string cubin_data; - const string cubin_filename = string_printf("lib/kernel_sm_%d%d.cubin", major, minor); - if (!path_read_text(path_get(cubin_filename), cubin_data)) { - set_error("Failed loading pre-compiled CUDA kernel " + cubin_filename + "."); - return false; - } - - check_result_cuda_ret(cuModuleLoadData(&cuda_module, cubin_data.data())); - } - - if (requested_features.use_denoising && cuda_filter_module == NULL) { - string filter_data; - const string filter_filename = string_printf("lib/filter_sm_%d%d.cubin", major, minor); - if (!path_read_text(path_get(filter_filename), filter_data)) { - set_error("Failed loading pre-compiled CUDA filter kernel " + filter_filename + "."); - return false; - } - - check_result_cuda_ret(cuModuleLoadData(&cuda_filter_module, filter_data.data())); - } - } - // Create program groups OptixProgramGroup groups[NUM_PROGRAM_GROUPS] = {}; OptixProgramGroupDesc group_descs[NUM_PROGRAM_GROUPS] = {}; @@ -542,9 +477,9 @@ class OptiXDevice : public Device { // Calculate maximum trace continuation stack size unsigned int trace_css = stack_size[PG_HITD].cssCH; // This is based on the maximum of closest-hit and any-hit/intersection programs - trace_css = max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH); - trace_css = max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH); - trace_css = max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH); + trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH); + trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH); + trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH); OptixPipelineLinkOptions link_options; link_options.maxTraceDepth = 1; @@ -613,8 +548,9 @@ class OptiXDevice : public Device { &pipelines[PIP_SHADER_EVAL])); // Calculate continuation stack size based on the maximum of all ray generation stack sizes - const unsigned int css = max(stack_size[PG_BAKE].cssRG, - max(stack_size[PG_DISP].cssRG, stack_size[PG_BACK].cssRG)) + + const unsigned int css = std::max(stack_size[PG_BAKE].cssRG, + std::max(stack_size[PG_DISP].cssRG, + stack_size[PG_BACK].cssRG)) + link_options.maxTraceDepth * trace_css; check_result_optix_ret(optixPipelineSetStackSize( @@ -635,12 +571,17 @@ class OptiXDevice : public Device { return; // Abort early if there was an error previously if (task.type == DeviceTask::RENDER) { + if (thread_index != 0) { + // Only execute denoising in a single thread (see also 'task_add') + task.tile_types &= ~RenderTile::DENOISE; + } + RenderTile tile; - while (task.acquire_tile(this, tile)) { + while (task.acquire_tile(this, tile, task.tile_types)) { if (tile.task == RenderTile::PATH_TRACE) launch_render(task, tile, thread_index); else if (tile.task == RenderTile::DENOISE) - launch_denoise(task, tile, thread_index); + launch_denoise(task, tile); task.release_tile(tile); if (task.get_cancel() && !task.need_finish_queue) break; // User requested cancellation @@ -651,8 +592,21 @@ class OptiXDevice : public Device { else if (task.type == DeviceTask::SHADER) { launch_shader_eval(task, thread_index); } - else if (task.type == DeviceTask::FILM_CONVERT) { - launch_film_convert(task, thread_index); + else if (task.type == DeviceTask::DENOISE_BUFFER) { + // Set up a single tile that covers the whole task and denoise it + RenderTile tile; + tile.x = task.x; + tile.y = task.y; + tile.w = task.w; + tile.h = task.h; + tile.buffer = task.buffer; + tile.num_samples = task.num_samples; + tile.start_sample = task.sample; + tile.offset = task.offset; + tile.stride = task.stride; + tile.buffers = task.buffers; + + launch_denoise(task, tile); } } @@ -674,21 +628,24 @@ class OptiXDevice : public Device { const int end_sample = rtile.start_sample + rtile.num_samples; // Keep this number reasonable to avoid running into TDRs - const int step_samples = (info.display_device ? 8 : 32); + int step_samples = (info.display_device ? 8 : 32); + if (task.adaptive_sampling.use) { + step_samples = task.adaptive_sampling.align_static_samples(step_samples); + } + // Offset into launch params buffer so that streams use separate data device_ptr launch_params_ptr = launch_params.device_pointer + thread_index * launch_params.data_elements; - const CUDAContextScope scope(cuda_context); + const CUDAContextScope scope(cuContext); for (int sample = rtile.start_sample; sample < end_sample; sample += step_samples) { // Copy work tile information to device wtile.num_samples = min(step_samples, end_sample - sample); wtile.start_sample = sample; - check_result_cuda(cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParams, tile), - &wtile, - sizeof(wtile), - cuda_stream[thread_index])); + device_ptr d_wtile_ptr = launch_params_ptr + offsetof(KernelParams, tile); + check_result_cuda( + cuMemcpyHtoDAsync(d_wtile_ptr, &wtile, sizeof(wtile), cuda_stream[thread_index])); OptixShaderBindingTable sbt_params = {}; sbt_params.raygenRecord = sbt_data.device_pointer + PG_RGEN * sizeof(SbtRecord); @@ -713,6 +670,12 @@ class OptiXDevice : public Device { wtile.h, 1)); + // Run the adaptive sampling kernels at selected samples aligned to step samples. + uint filter_sample = wtile.start_sample + wtile.num_samples - 1; + if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(filter_sample)) { + adaptive_sampling_filter(filter_sample, &wtile, d_wtile_ptr, cuda_stream[thread_index]); + } + // Wait for launch to finish check_result_cuda(cuStreamSynchronize(cuda_stream[thread_index])); @@ -724,13 +687,23 @@ class OptiXDevice : public Device { if (task.get_cancel() && !task.need_finish_queue) return; // Cancel rendering } + + // Finalize adaptive sampling + if (task.adaptive_sampling.use) { + device_ptr d_wtile_ptr = launch_params_ptr + offsetof(KernelParams, tile); + adaptive_sampling_post(rtile, &wtile, d_wtile_ptr, cuda_stream[thread_index]); + check_result_cuda(cuStreamSynchronize(cuda_stream[thread_index])); + task.update_progress(&rtile, rtile.w * rtile.h * wtile.num_samples); + } } - bool launch_denoise(DeviceTask &task, RenderTile &rtile, int thread_index) + bool launch_denoise(DeviceTask &task, RenderTile &rtile) { - int total_samples = rtile.start_sample + rtile.num_samples; + // Update current sample (for display and NLM denoising task) + rtile.sample = rtile.start_sample + rtile.num_samples; - const CUDAContextScope scope(cuda_context); + // Make CUDA context current now, since it is used for both denoising tasks + const CUDAContextScope scope(cuContext); // Choose between OptiX and NLM denoising if (task.denoising_use_optix) { @@ -742,6 +715,7 @@ class OptiXDevice : public Device { RenderTile rtiles[10]; rtiles[4] = rtile; task.map_neighbor_tiles(rtiles, this); + rtile = rtiles[4]; // Tile may have been modified by mapping code // Calculate size of the tile to denoise (including overlap) int4 rect = make_int4( @@ -808,47 +782,40 @@ class OptiXDevice : public Device { tile_info->y[3] = rtiles[7].y + rtiles[7].h; tile_info_mem.copy_to_device(); - CUfunction filter_copy_func; - check_result_cuda_ret(cuModuleGetFunction( - &filter_copy_func, cuda_filter_module, "kernel_cuda_filter_copy_input")); - check_result_cuda_ret(cuFuncSetCacheConfig(filter_copy_func, CU_FUNC_CACHE_PREFER_L1)); - void *args[] = { &input.device_pointer, &tile_info_mem.device_pointer, &rect.x, &task.pass_stride}; - CUDA_GET_BLOCKSIZE(filter_copy_func, rect_size.x, rect_size.y); - CUDA_LAUNCH_KERNEL(filter_copy_func, args); + launch_filter_kernel("kernel_cuda_filter_copy_input", rect_size.x, rect_size.y, args); } # if OPTIX_DENOISER_NO_PIXEL_STRIDE device_only_memory<float> input_rgb(this, "denoiser input rgb"); - { - input_rgb.alloc_to_device(rect_size.x * rect_size.y * 3 * - task.denoising.optix_input_passes); - - CUfunction convert_to_rgb_func; - check_result_cuda_ret(cuModuleGetFunction( - &convert_to_rgb_func, cuda_filter_module, "kernel_cuda_filter_convert_to_rgb")); - check_result_cuda_ret(cuFuncSetCacheConfig(convert_to_rgb_func, CU_FUNC_CACHE_PREFER_L1)); - - void *args[] = {&input_rgb.device_pointer, - &input_ptr, - &rect_size.x, - &rect_size.y, - &input_stride, - &task.pass_stride, - const_cast<int *>(pass_offset), - &task.denoising.optix_input_passes, - &total_samples}; - CUDA_GET_BLOCKSIZE(convert_to_rgb_func, rect_size.x, rect_size.y); - CUDA_LAUNCH_KERNEL(convert_to_rgb_func, args); - - input_ptr = input_rgb.device_pointer; - pixel_stride = 3 * sizeof(float); - input_stride = rect_size.x * pixel_stride; - } + input_rgb.alloc_to_device(rect_size.x * rect_size.y * 3 * task.denoising.optix_input_passes); + + void *input_args[] = {&input_rgb.device_pointer, + &input_ptr, + &rect_size.x, + &rect_size.y, + &input_stride, + &task.pass_stride, + const_cast<int *>(pass_offset), + &task.denoising.optix_input_passes, + &rtile.sample}; + launch_filter_kernel( + "kernel_cuda_filter_convert_to_rgb", rect_size.x, rect_size.y, input_args); + + input_ptr = input_rgb.device_pointer; + pixel_stride = 3 * sizeof(float); + input_stride = rect_size.x * pixel_stride; # endif - if (denoiser == NULL) { + const bool recreate_denoiser = (denoiser == NULL) || + (task.denoising.optix_input_passes != denoiser_input_passes); + if (recreate_denoiser) { + // Destroy existing handle before creating new one + if (denoiser != NULL) { + optixDenoiserDestroy(denoiser); + } + // Create OptiX denoiser handle on demand when it is first used OptixDenoiserOptions denoiser_options; assert(task.denoising.optix_input_passes >= 1 && task.denoising.optix_input_passes <= 3); @@ -858,35 +825,35 @@ class OptiXDevice : public Device { check_result_optix_ret(optixDenoiserCreate(context, &denoiser_options, &denoiser)); check_result_optix_ret( optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, NULL, 0)); + + // OptiX denoiser handle was created with the requested number of input passes + denoiser_input_passes = task.denoising.optix_input_passes; } OptixDenoiserSizes sizes = {}; check_result_optix_ret( optixDenoiserComputeMemoryResources(denoiser, rect_size.x, rect_size.y, &sizes)); - auto &state = denoiser_state[thread_index].second; - auto &state_size = denoiser_state[thread_index].first; const size_t scratch_size = sizes.recommendedScratchSizeInBytes; const size_t scratch_offset = sizes.stateSizeInBytes; // Allocate denoiser state if tile size has changed since last setup - if (state_size.x != rect_size.x || state_size.y != rect_size.y) { - if (state) { - cuMemFree(state); - state = 0; - } - check_result_cuda_ret(cuMemAlloc(&state, scratch_offset + scratch_size)); + if (recreate_denoiser || (denoiser_state.data_width != rect_size.x || + denoiser_state.data_height != rect_size.y)) { + denoiser_state.alloc_to_device(scratch_offset + scratch_size); + // Initialize denoiser state for the current tile size check_result_optix_ret(optixDenoiserSetup(denoiser, - cuda_stream[thread_index], + 0, rect_size.x, rect_size.y, - state, + denoiser_state.device_pointer, scratch_offset, - state + scratch_offset, + denoiser_state.device_pointer + scratch_offset, scratch_size)); - state_size = rect_size; + denoiser_state.data_width = rect_size.x; + denoiser_state.data_height = rect_size.y; } // Set up input and output layer information @@ -926,94 +893,46 @@ class OptiXDevice : public Device { // Finally run denonising OptixDenoiserParams params = {}; // All parameters are disabled/zero check_result_optix_ret(optixDenoiserInvoke(denoiser, - cuda_stream[thread_index], + 0, ¶ms, - state, + denoiser_state.device_pointer, scratch_offset, input_layers, task.denoising.optix_input_passes, overlap_offset.x, overlap_offset.y, output_layers, - state + scratch_offset, + denoiser_state.device_pointer + scratch_offset, scratch_size)); # if OPTIX_DENOISER_NO_PIXEL_STRIDE - { - CUfunction convert_from_rgb_func; - check_result_cuda_ret(cuModuleGetFunction( - &convert_from_rgb_func, cuda_filter_module, "kernel_cuda_filter_convert_from_rgb")); - check_result_cuda_ret( - cuFuncSetCacheConfig(convert_from_rgb_func, CU_FUNC_CACHE_PREFER_L1)); - - void *args[] = {&input_ptr, - &rtiles[9].buffer, - &output_offset.x, - &output_offset.y, - &rect_size.x, - &rect_size.y, - &rtiles[9].x, - &rtiles[9].y, - &rtiles[9].w, - &rtiles[9].h, - &rtiles[9].offset, - &rtiles[9].stride, - &task.pass_stride}; - CUDA_GET_BLOCKSIZE(convert_from_rgb_func, rtiles[9].w, rtiles[9].h); - CUDA_LAUNCH_KERNEL(convert_from_rgb_func, args); - } + void *output_args[] = {&input_ptr, + &rtiles[9].buffer, + &output_offset.x, + &output_offset.y, + &rect_size.x, + &rect_size.y, + &rtiles[9].x, + &rtiles[9].y, + &rtiles[9].w, + &rtiles[9].h, + &rtiles[9].offset, + &rtiles[9].stride, + &task.pass_stride}; + launch_filter_kernel( + "kernel_cuda_filter_convert_from_rgb", rtiles[9].w, rtiles[9].h, output_args); # endif - check_result_cuda_ret(cuStreamSynchronize(cuda_stream[thread_index])); + check_result_cuda_ret(cuStreamSynchronize(0)); task.unmap_neighbor_tiles(rtiles, this); } else { // Run CUDA denoising kernels DenoisingTask denoising(this, task); - denoising.functions.construct_transform = function_bind( - &OptiXDevice::denoising_construct_transform, this, &denoising, thread_index); - denoising.functions.accumulate = function_bind( - &OptiXDevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising, thread_index); - denoising.functions.solve = function_bind( - &OptiXDevice::denoising_solve, this, _1, &denoising, thread_index); - denoising.functions.divide_shadow = function_bind(&OptiXDevice::denoising_divide_shadow, - this, - _1, - _2, - _3, - _4, - _5, - &denoising, - thread_index); - denoising.functions.non_local_means = function_bind( - &OptiXDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising, thread_index); - denoising.functions.combine_halves = function_bind(&OptiXDevice::denoising_combine_halves, - this, - _1, - _2, - _3, - _4, - _5, - _6, - &denoising, - thread_index); - denoising.functions.get_feature = function_bind( - &OptiXDevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising, thread_index); - denoising.functions.write_feature = function_bind( - &OptiXDevice::denoising_write_feature, this, _1, _2, _3, &denoising, thread_index); - denoising.functions.detect_outliers = function_bind( - &OptiXDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising, thread_index); - - denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h); - denoising.render_buffer.samples = total_samples; - denoising.buffer.gpu_temporary_mem = true; - - denoising.run_denoising(&rtile); + CUDADevice::denoise(rtile, denoising); } - // Update current sample, so it is displayed correctly - rtile.sample = total_samples; // Update task progress after the denoiser completed processing task.update_progress(&rtile, rtile.w * rtile.h); @@ -1028,7 +947,7 @@ class OptiXDevice : public Device { if (task.shader_eval_type == SHADER_EVAL_DISPLACE) rgen_index = PG_DISP; - const CUDAContextScope scope(cuda_context); + const CUDAContextScope scope(cuContext); device_ptr launch_params_ptr = launch_params.device_pointer + thread_index * launch_params.data_elements; @@ -1075,62 +994,13 @@ class OptiXDevice : public Device { } } - void launch_film_convert(DeviceTask &task, int thread_index) - { - const CUDAContextScope scope(cuda_context); - - CUfunction film_convert_func; - check_result_cuda(cuModuleGetFunction(&film_convert_func, - cuda_module, - task.rgba_byte ? "kernel_cuda_convert_to_byte" : - "kernel_cuda_convert_to_half_float")); - - float sample_scale = 1.0f / (task.sample + 1); - CUdeviceptr rgba = (task.rgba_byte ? task.rgba_byte : task.rgba_half); - - void *args[] = {&rgba, - &task.buffer, - &sample_scale, - &task.x, - &task.y, - &task.w, - &task.h, - &task.offset, - &task.stride}; - - int threads_per_block; - check_result_cuda(cuFuncGetAttribute( - &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, film_convert_func)); - - const int num_threads_x = (int)sqrt(threads_per_block); - const int num_blocks_x = (task.w + num_threads_x - 1) / num_threads_x; - const int num_threads_y = (int)sqrt(threads_per_block); - const int num_blocks_y = (task.h + num_threads_y - 1) / num_threads_y; - - check_result_cuda(cuLaunchKernel(film_convert_func, - num_blocks_x, - num_blocks_y, - 1, /* blocks */ - num_threads_x, - num_threads_y, - 1, /* threads */ - 0, - cuda_stream[thread_index], - args, - 0)); - - check_result_cuda(cuStreamSynchronize(cuda_stream[thread_index])); - - task.update_progress(NULL); - } - bool build_optix_bvh(const OptixBuildInput &build_input, uint16_t num_motion_steps, OptixTraversableHandle &out_handle) { out_handle = 0; - const CUDAContextScope scope(cuda_context); + const CUDAContextScope scope(cuContext); // Compute memory usage OptixAccelBufferSizes sizes = {}; @@ -1232,8 +1102,8 @@ class OptiXDevice : public Device { assert(bvh->params.top_level); unsigned int num_instances = 0; - unordered_map<Mesh *, vector<OptixTraversableHandle>> meshes; - meshes.reserve(bvh->meshes.size()); + unordered_map<Geometry *, OptixTraversableHandle> geometry; + geometry.reserve(bvh->geometry.size()); // Free all previous acceleration structures for (CUdeviceptr mem : as_mem) { @@ -1244,23 +1114,25 @@ class OptiXDevice : public Device { // Build bottom level acceleration structures (BLAS) // Note: Always keep this logic in sync with bvh_optix.cpp! for (Object *ob : bvh->objects) { - // Skip meshes for which acceleration structure already exists - if (meshes.find(ob->mesh) != meshes.end()) + // Skip geometry for which acceleration structure already exists + Geometry *geom = ob->geometry; + if (geometry.find(geom) != geometry.end()) continue; - Mesh *const mesh = ob->mesh; - vector<OptixTraversableHandle> handles; - handles.reserve(2); + if (geom->type == Geometry::HAIR) { + // Build BLAS for curve primitives + Hair *const hair = static_cast<Hair *const>(ob->geometry); + if (hair->num_curves() == 0) { + continue; + } - // Build BLAS for curve primitives - if (bvh->params.primitive_mask & PRIMITIVE_ALL_CURVE && mesh->num_curves() > 0) { - const size_t num_curves = mesh->num_curves(); - const size_t num_segments = mesh->num_segments(); + const size_t num_curves = hair->num_curves(); + const size_t num_segments = hair->num_segments(); size_t num_motion_steps = 1; - Attribute *motion_keys = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); - if (motion_blur && mesh->use_motion_blur && motion_keys) { - num_motion_steps = mesh->motion_steps; + Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + if (motion_blur && hair->use_motion_blur && motion_keys) { + num_motion_steps = hair->motion_steps; } device_vector<OptixAabb> aabb_data(this, "temp_aabb_data", MEM_READ_ONLY); @@ -1269,21 +1141,21 @@ class OptiXDevice : public Device { // Get AABBs for each motion step for (size_t step = 0; step < num_motion_steps; ++step) { // The center step for motion vertices is not stored in the attribute - const float3 *keys = mesh->curve_keys.data(); + const float3 *keys = hair->curve_keys.data(); size_t center_step = (num_motion_steps - 1) / 2; if (step != center_step) { size_t attr_offset = (step > center_step) ? step - 1 : step; // Technically this is a float4 array, but sizeof(float3) is the same as sizeof(float4) - keys = motion_keys->data_float3() + attr_offset * mesh->curve_keys.size(); + keys = motion_keys->data_float3() + attr_offset * hair->curve_keys.size(); } size_t i = step * num_segments; for (size_t j = 0; j < num_curves; ++j) { - const Mesh::Curve c = mesh->get_curve(j); + const Hair::Curve c = hair->get_curve(j); for (size_t k = 0; k < c.num_segments(); ++i, ++k) { BoundBox bounds = BoundBox::empty; - c.bounds_grow(k, keys, mesh->curve_radius.data(), bounds); + c.bounds_grow(k, keys, hair->curve_radius.data(), bounds); aabb_data[i].minX = bounds.min.x; aabb_data[i].minY = bounds.min.y; @@ -1314,16 +1186,24 @@ class OptiXDevice : public Device { build_input.aabbArray.strideInBytes = sizeof(OptixAabb); build_input.aabbArray.flags = &build_flags; build_input.aabbArray.numSbtRecords = 1; - build_input.aabbArray.primitiveIndexOffset = mesh->prim_offset; + build_input.aabbArray.primitiveIndexOffset = hair->optix_prim_offset; // Allocate memory for new BLAS and build it - handles.emplace_back(); - if (!build_optix_bvh(build_input, num_motion_steps, handles.back())) + OptixTraversableHandle handle; + if (build_optix_bvh(build_input, num_motion_steps, handle)) { + geometry.insert({ob->geometry, handle}); + } + else { return false; + } } + else if (geom->type == Geometry::MESH) { + // Build BLAS for triangle primitives + Mesh *const mesh = static_cast<Mesh *const>(ob->geometry); + if (mesh->num_triangles() == 0) { + continue; + } - // Build BLAS for triangle primitives - if (bvh->params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && mesh->num_triangles() > 0) { const size_t num_verts = mesh->verts.size(); size_t num_motion_steps = 1; @@ -1378,23 +1258,24 @@ class OptiXDevice : public Device { // buffers for that purpose. OptiX does not allow this to be zero though, so just pass in // one and rely on that having the same meaning in this case. build_input.triangleArray.numSbtRecords = 1; - // Triangle primitives are packed right after the curve primitives of this mesh - build_input.triangleArray.primitiveIndexOffset = mesh->prim_offset + mesh->num_segments(); + build_input.triangleArray.primitiveIndexOffset = mesh->optix_prim_offset; // Allocate memory for new BLAS and build it - handles.emplace_back(); - if (!build_optix_bvh(build_input, num_motion_steps, handles.back())) + OptixTraversableHandle handle; + if (build_optix_bvh(build_input, num_motion_steps, handle)) { + geometry.insert({ob->geometry, handle}); + } + else { return false; + } } - - meshes.insert({mesh, handles}); } // Fill instance descriptions device_vector<OptixAabb> aabbs(this, "tlas_aabbs", MEM_READ_ONLY); - aabbs.alloc(bvh->objects.size() * 2); + aabbs.alloc(bvh->objects.size()); device_vector<OptixInstance> instances(this, "tlas_instances", MEM_READ_ONLY); - instances.alloc(bvh->objects.size() * 2); + instances.alloc(bvh->objects.size()); for (Object *ob : bvh->objects) { // Skip non-traceable objects @@ -1402,113 +1283,117 @@ class OptiXDevice : public Device { continue; // Create separate instance for triangle/curve meshes of an object - for (OptixTraversableHandle handle : meshes[ob->mesh]) { - OptixAabb &aabb = aabbs[num_instances]; - aabb.minX = ob->bounds.min.x; - aabb.minY = ob->bounds.min.y; - aabb.minZ = ob->bounds.min.z; - aabb.maxX = ob->bounds.max.x; - aabb.maxY = ob->bounds.max.y; - aabb.maxZ = ob->bounds.max.z; - - OptixInstance &instance = instances[num_instances++]; - memset(&instance, 0, sizeof(instance)); - - // Clear transform to identity matrix - instance.transform[0] = 1.0f; - instance.transform[5] = 1.0f; - instance.transform[10] = 1.0f; - - // Set user instance ID to object index - instance.instanceId = ob->get_device_index(); - - // Volumes have a special bit set in the visibility mask so a trace can mask only volumes - // See 'scene_intersect_volume' in bvh.h - instance.visibilityMask = (ob->mesh->has_volume ? 3 : 1); - - // Insert motion traversable if object has motion - if (motion_blur && ob->use_motion()) { - size_t motion_keys = max(ob->motion.size(), 2) - 2; - size_t motion_transform_size = sizeof(OptixSRTMotionTransform) + - motion_keys * sizeof(OptixSRTData); - - const CUDAContextScope scope(cuda_context); - - CUdeviceptr motion_transform_gpu = 0; - check_result_cuda_ret(cuMemAlloc(&motion_transform_gpu, motion_transform_size)); - as_mem.push_back(motion_transform_gpu); - - // Allocate host side memory for motion transform and fill it with transform data - OptixSRTMotionTransform &motion_transform = *reinterpret_cast<OptixSRTMotionTransform *>( - new uint8_t[motion_transform_size]); - motion_transform.child = handle; - motion_transform.motionOptions.numKeys = ob->motion.size(); - motion_transform.motionOptions.flags = OPTIX_MOTION_FLAG_NONE; - motion_transform.motionOptions.timeBegin = 0.0f; - motion_transform.motionOptions.timeEnd = 1.0f; - - OptixSRTData *const srt_data = motion_transform.srtData; - array<DecomposedTransform> decomp(ob->motion.size()); - transform_motion_decompose(decomp.data(), ob->motion.data(), ob->motion.size()); - - for (size_t i = 0; i < ob->motion.size(); ++i) { - // Scale - srt_data[i].sx = decomp[i].y.w; // scale.x.x - srt_data[i].sy = decomp[i].z.w; // scale.y.y - srt_data[i].sz = decomp[i].w.w; // scale.z.z - - // Shear - srt_data[i].a = decomp[i].z.x; // scale.x.y - srt_data[i].b = decomp[i].z.y; // scale.x.z - srt_data[i].c = decomp[i].w.x; // scale.y.z - assert(decomp[i].z.z == 0.0f); // scale.y.x - assert(decomp[i].w.y == 0.0f); // scale.z.x - assert(decomp[i].w.z == 0.0f); // scale.z.y - - // Pivot point - srt_data[i].pvx = 0.0f; - srt_data[i].pvy = 0.0f; - srt_data[i].pvz = 0.0f; - - // Rotation - srt_data[i].qx = decomp[i].x.x; - srt_data[i].qy = decomp[i].x.y; - srt_data[i].qz = decomp[i].x.z; - srt_data[i].qw = decomp[i].x.w; - - // Translation - srt_data[i].tx = decomp[i].y.x; - srt_data[i].ty = decomp[i].y.y; - srt_data[i].tz = decomp[i].y.z; - } + auto handle_it = geometry.find(ob->geometry); + if (handle_it == geometry.end()) { + continue; + } + OptixTraversableHandle handle = handle_it->second; + + OptixAabb &aabb = aabbs[num_instances]; + aabb.minX = ob->bounds.min.x; + aabb.minY = ob->bounds.min.y; + aabb.minZ = ob->bounds.min.z; + aabb.maxX = ob->bounds.max.x; + aabb.maxY = ob->bounds.max.y; + aabb.maxZ = ob->bounds.max.z; + + OptixInstance &instance = instances[num_instances++]; + memset(&instance, 0, sizeof(instance)); + + // Clear transform to identity matrix + instance.transform[0] = 1.0f; + instance.transform[5] = 1.0f; + instance.transform[10] = 1.0f; + + // Set user instance ID to object index + instance.instanceId = ob->get_device_index(); + + // Volumes have a special bit set in the visibility mask so a trace can mask only volumes + // See 'scene_intersect_volume' in bvh.h + instance.visibilityMask = (ob->geometry->has_volume ? 3 : 1); + + // Insert motion traversable if object has motion + if (motion_blur && ob->use_motion()) { + size_t motion_keys = max(ob->motion.size(), 2) - 2; + size_t motion_transform_size = sizeof(OptixSRTMotionTransform) + + motion_keys * sizeof(OptixSRTData); + + const CUDAContextScope scope(cuContext); + + CUdeviceptr motion_transform_gpu = 0; + check_result_cuda_ret(cuMemAlloc(&motion_transform_gpu, motion_transform_size)); + as_mem.push_back(motion_transform_gpu); + + // Allocate host side memory for motion transform and fill it with transform data + OptixSRTMotionTransform &motion_transform = *reinterpret_cast<OptixSRTMotionTransform *>( + new uint8_t[motion_transform_size]); + motion_transform.child = handle; + motion_transform.motionOptions.numKeys = ob->motion.size(); + motion_transform.motionOptions.flags = OPTIX_MOTION_FLAG_NONE; + motion_transform.motionOptions.timeBegin = 0.0f; + motion_transform.motionOptions.timeEnd = 1.0f; + + OptixSRTData *const srt_data = motion_transform.srtData; + array<DecomposedTransform> decomp(ob->motion.size()); + transform_motion_decompose(decomp.data(), ob->motion.data(), ob->motion.size()); + + for (size_t i = 0; i < ob->motion.size(); ++i) { + // Scale + srt_data[i].sx = decomp[i].y.w; // scale.x.x + srt_data[i].sy = decomp[i].z.w; // scale.y.y + srt_data[i].sz = decomp[i].w.w; // scale.z.z + + // Shear + srt_data[i].a = decomp[i].z.x; // scale.x.y + srt_data[i].b = decomp[i].z.y; // scale.x.z + srt_data[i].c = decomp[i].w.x; // scale.y.z + assert(decomp[i].z.z == 0.0f); // scale.y.x + assert(decomp[i].w.y == 0.0f); // scale.z.x + assert(decomp[i].w.z == 0.0f); // scale.z.y + + // Pivot point + srt_data[i].pvx = 0.0f; + srt_data[i].pvy = 0.0f; + srt_data[i].pvz = 0.0f; + + // Rotation + srt_data[i].qx = decomp[i].x.x; + srt_data[i].qy = decomp[i].x.y; + srt_data[i].qz = decomp[i].x.z; + srt_data[i].qw = decomp[i].x.w; + + // Translation + srt_data[i].tx = decomp[i].y.x; + srt_data[i].ty = decomp[i].y.y; + srt_data[i].tz = decomp[i].y.z; + } - // Upload motion transform to GPU - cuMemcpyHtoD(motion_transform_gpu, &motion_transform, motion_transform_size); - delete[] reinterpret_cast<uint8_t *>(&motion_transform); + // Upload motion transform to GPU + cuMemcpyHtoD(motion_transform_gpu, &motion_transform, motion_transform_size); + delete[] reinterpret_cast<uint8_t *>(&motion_transform); - // Disable instance transform if object uses motion transform already - instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM; + // Disable instance transform if object uses motion transform already + instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM; + + // Get traversable handle to motion transform + optixConvertPointerToTraversableHandle(context, + motion_transform_gpu, + OPTIX_TRAVERSABLE_TYPE_SRT_MOTION_TRANSFORM, + &instance.traversableHandle); + } + else { + instance.traversableHandle = handle; - // Get traversable handle to motion transform - optixConvertPointerToTraversableHandle(context, - motion_transform_gpu, - OPTIX_TRAVERSABLE_TYPE_SRT_MOTION_TRANSFORM, - &instance.traversableHandle); + if (ob->geometry->is_instanced()) { + // Set transform matrix + memcpy(instance.transform, &ob->tfm, sizeof(instance.transform)); } else { - instance.traversableHandle = handle; - - if (ob->mesh->is_instanced()) { - // Set transform matrix - memcpy(instance.transform, &ob->tfm, sizeof(instance.transform)); - } - else { - // Disable instance transform if mesh already has it applied to vertex data - instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM; - // Non-instanced objects read ID from prim_object, so - // distinguish them from instanced objects with high bit set - instance.instanceId |= 0x800000; - } + // Disable instance transform if geometry already has it applied to vertex data + instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM; + // Non-instanced objects read ID from prim_object, so + // distinguish them from instanced objects with high bit set + instance.instanceId |= 0x800000; } } } @@ -1530,655 +1415,76 @@ class OptiXDevice : public Device { return build_optix_bvh(build_input, 0, tlas_handle); } - void update_texture_info() - { - if (need_texture_info) { - texture_info.copy_to_device(); - need_texture_info = false; - } - } - - void update_launch_params(const char *name, size_t offset, void *data, size_t data_size) + void const_copy_to(const char *name, void *host, size_t size) override { - const CUDAContextScope scope(cuda_context); - - for (int i = 0; i < info.cpu_threads; ++i) - check_result_cuda( - cuMemcpyHtoD(launch_params.device_pointer + i * launch_params.data_elements + offset, - data, - data_size)); - // Set constant memory for CUDA module - // TODO(pmours): This is only used for tonemapping (see 'launch_film_convert'). + // TODO(pmours): This is only used for tonemapping (see 'film_convert'). // Could be removed by moving those functions to filter CUDA module. - size_t bytes = 0; - CUdeviceptr mem = 0; - check_result_cuda(cuModuleGetGlobal(&mem, &bytes, cuda_module, name)); - assert(mem != 0 && bytes == data_size); - check_result_cuda(cuMemcpyHtoD(mem, data, data_size)); - } - - void mem_alloc(device_memory &mem) override - { - if (mem.type == MEM_PIXELS && !background) { - // Always fall back to no interop for now - // TODO(pmours): Support OpenGL interop when moving CUDA memory management to common code - background = true; - } - else if (mem.type == MEM_TEXTURE) { - assert(!"mem_alloc not supported for textures."); - return; - } - - generic_alloc(mem); - } - - CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0) - { - CUDAContextScope scope(cuda_context); - - CUdeviceptr device_pointer = 0; - size_t size = mem.memory_size() + pitch_padding; - - CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY; - const char *status = ""; - - /* First try allocating in device memory, respecting headroom. We make - * an exception for texture info. It is small and frequently accessed, - * so treat it as working memory. - * - * If there is not enough room for working memory, we will try to move - * textures to host memory, assuming the performance impact would have - * been worse for working memory. */ - bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info); - bool is_image = is_texture && (mem.data_height > 1); - - size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom; - - size_t total = 0, free = 0; - cuMemGetInfo(&free, &total); + CUDADevice::const_copy_to(name, host, size); - /* Move textures to host memory if needed. */ - if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) { - move_textures_to_host(size + headroom - free, is_texture); - cuMemGetInfo(&free, &total); - } - - /* Allocate in device memory. */ - if (!move_texture_to_host && (size + headroom) < free) { - mem_alloc_result = cuMemAlloc(&device_pointer, size); - if (mem_alloc_result == CUDA_SUCCESS) { - status = " in device memory"; - } - } - - /* Fall back to mapped host memory if needed and possible. */ - void *shared_pointer = 0; - - if (mem_alloc_result != CUDA_SUCCESS && can_map_host) { - if (mem.shared_pointer) { - /* Another device already allocated host memory. */ - mem_alloc_result = CUDA_SUCCESS; - shared_pointer = mem.shared_pointer; - } - else if (map_host_used + size < map_host_limit) { - /* Allocate host memory ourselves. */ - mem_alloc_result = cuMemHostAlloc( - &shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED); - - assert((mem_alloc_result == CUDA_SUCCESS && shared_pointer != 0) || - (mem_alloc_result != CUDA_SUCCESS && shared_pointer == 0)); - } - - if (mem_alloc_result == CUDA_SUCCESS) { - cuMemHostGetDevicePointer_v2(&device_pointer, shared_pointer, 0); - map_host_used += size; - status = " in host memory"; - } - else { - status = " failed, out of host memory"; - } - } - else if (mem_alloc_result != CUDA_SUCCESS) { - status = " failed, out of device and host memory"; - } - - if (mem.name) { - VLOG(1) << "Buffer allocate: " << mem.name << ", " - << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")" << status; - } - - if (mem_alloc_result != CUDA_SUCCESS) { - set_error(string_printf("Buffer allocate %s", status)); - return NULL; - } - - mem.device_pointer = (device_ptr)device_pointer; - mem.device_size = size; - stats.mem_alloc(size); - - if (!mem.device_pointer) { - return NULL; - } - - /* Insert into map of allocations. */ - CUDAMem *cmem = &cuda_mem_map[&mem]; - if (shared_pointer != 0) { - /* Replace host pointer with our host allocation. Only works if - * CUDA memory layout is the same and has no pitch padding. Also - * does not work if we move textures to host during a render, - * since other devices might be using the memory. */ - - if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer && - mem.host_pointer != shared_pointer) { - memcpy(shared_pointer, mem.host_pointer, size); - - /* A call to device_memory::host_free() should be preceded by - * a call to device_memory::device_free() for host memory - * allocated by a device to be handled properly. Two exceptions - * are here and a call in CUDADevice::generic_alloc(), where - * the current host memory can be assumed to be allocated by - * device_memory::host_alloc(), not by a device */ - - mem.host_free(); - mem.host_pointer = shared_pointer; - } - mem.shared_pointer = shared_pointer; - mem.shared_counter++; - cmem->use_mapped_host = true; - } - else { - cmem->use_mapped_host = false; - } - - return cmem; - } + if (strcmp(name, "__data") == 0) { + assert(size <= sizeof(KernelData)); - void tex_alloc(device_memory &mem) - { - CUDAContextScope scope(cuda_context); - - /* General variables for both architectures */ - string bind_name = mem.name; - size_t dsize = datatype_size(mem.data_type); - size_t size = mem.memory_size(); - - CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP; - switch (mem.extension) { - case EXTENSION_REPEAT: - address_mode = CU_TR_ADDRESS_MODE_WRAP; - break; - case EXTENSION_EXTEND: - address_mode = CU_TR_ADDRESS_MODE_CLAMP; - break; - case EXTENSION_CLIP: - address_mode = CU_TR_ADDRESS_MODE_BORDER; - break; - default: - assert(0); - break; - } + // Fix traversable handle on multi devices + KernelData *const data = (KernelData *)host; + *(OptixTraversableHandle *)&data->bvh.scene = tlas_handle; - CUfilter_mode filter_mode; - if (mem.interpolation == INTERPOLATION_CLOSEST) { - filter_mode = CU_TR_FILTER_MODE_POINT; - } - else { - filter_mode = CU_TR_FILTER_MODE_LINEAR; + update_launch_params(name, offsetof(KernelParams, data), host, size); + return; } - /* Data Storage */ - if (mem.interpolation == INTERPOLATION_NONE) { - generic_alloc(mem); - generic_copy_to(mem); - - // Update data storage pointers in launch parameters + // Update data storage pointers in launch parameters # define KERNEL_TEX(data_type, tex_name) \ - if (strcmp(mem.name, #tex_name) == 0) \ - update_launch_params( \ - mem.name, offsetof(KernelParams, tex_name), &mem.device_pointer, sizeof(device_ptr)); + if (strcmp(name, #tex_name) == 0) { \ + update_launch_params(name, offsetof(KernelParams, tex_name), host, size); \ + return; \ + } # include "kernel/kernel_textures.h" # undef KERNEL_TEX - return; - } - - /* Image Texture Storage */ - CUarray_format_enum format; - switch (mem.data_type) { - case TYPE_UCHAR: - format = CU_AD_FORMAT_UNSIGNED_INT8; - break; - case TYPE_UINT16: - format = CU_AD_FORMAT_UNSIGNED_INT16; - break; - case TYPE_UINT: - format = CU_AD_FORMAT_UNSIGNED_INT32; - break; - case TYPE_INT: - format = CU_AD_FORMAT_SIGNED_INT32; - break; - case TYPE_FLOAT: - format = CU_AD_FORMAT_FLOAT; - break; - case TYPE_HALF: - format = CU_AD_FORMAT_HALF; - break; - default: - assert(0); - return; - } - - CUDAMem *cmem = NULL; - CUarray array_3d = NULL; - size_t src_pitch = mem.data_width * dsize * mem.data_elements; - size_t dst_pitch = src_pitch; - - if (mem.data_depth > 1) { - /* 3D texture using array, there is no API for linear memory. */ - CUDA_ARRAY3D_DESCRIPTOR desc; - - desc.Width = mem.data_width; - desc.Height = mem.data_height; - desc.Depth = mem.data_depth; - desc.Format = format; - desc.NumChannels = mem.data_elements; - desc.Flags = 0; - - VLOG(1) << "Array 3D allocate: " << mem.name << ", " - << string_human_readable_number(mem.memory_size()) << " bytes. (" - << string_human_readable_size(mem.memory_size()) << ")"; - - check_result_cuda(cuArray3DCreate(&array_3d, &desc)); - - if (!array_3d) { - return; - } - - CUDA_MEMCPY3D param; - memset(¶m, 0, sizeof(param)); - param.dstMemoryType = CU_MEMORYTYPE_ARRAY; - param.dstArray = array_3d; - param.srcMemoryType = CU_MEMORYTYPE_HOST; - param.srcHost = mem.host_pointer; - param.srcPitch = src_pitch; - param.WidthInBytes = param.srcPitch; - param.Height = mem.data_height; - param.Depth = mem.data_depth; - - check_result_cuda(cuMemcpy3D(¶m)); - - mem.device_pointer = (device_ptr)array_3d; - mem.device_size = size; - stats.mem_alloc(size); - - cmem = &cuda_mem_map[&mem]; - cmem->texobject = 0; - cmem->array = array_3d; - } - else if (mem.data_height > 0) { - /* 2D texture, using pitch aligned linear memory. */ - int alignment = 0; - check_result_cuda(cuDeviceGetAttribute( - &alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuda_device)); - dst_pitch = align_up(src_pitch, alignment); - size_t dst_size = dst_pitch * mem.data_height; - - cmem = generic_alloc(mem, dst_size - mem.memory_size()); - if (!cmem) { - return; - } - - CUDA_MEMCPY2D param; - memset(¶m, 0, sizeof(param)); - param.dstMemoryType = CU_MEMORYTYPE_DEVICE; - param.dstDevice = mem.device_pointer; - param.dstPitch = dst_pitch; - param.srcMemoryType = CU_MEMORYTYPE_HOST; - param.srcHost = mem.host_pointer; - param.srcPitch = src_pitch; - param.WidthInBytes = param.srcPitch; - param.Height = mem.data_height; - - check_result_cuda(cuMemcpy2DUnaligned(¶m)); - } - else { - /* 1D texture, using linear memory. */ - cmem = generic_alloc(mem); - if (!cmem) { - return; - } - - check_result_cuda(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size)); - } - - /* Kepler+, bindless textures. */ - int flat_slot = 0; - if (string_startswith(mem.name, "__tex_image")) { - int pos = string(mem.name).rfind("_"); - flat_slot = atoi(mem.name + pos + 1); - } - else { - assert(0); - } - - CUDA_RESOURCE_DESC resDesc; - memset(&resDesc, 0, sizeof(resDesc)); - - if (array_3d) { - resDesc.resType = CU_RESOURCE_TYPE_ARRAY; - resDesc.res.array.hArray = array_3d; - resDesc.flags = 0; - } - else if (mem.data_height > 0) { - resDesc.resType = CU_RESOURCE_TYPE_PITCH2D; - resDesc.res.pitch2D.devPtr = mem.device_pointer; - resDesc.res.pitch2D.format = format; - resDesc.res.pitch2D.numChannels = mem.data_elements; - resDesc.res.pitch2D.height = mem.data_height; - resDesc.res.pitch2D.width = mem.data_width; - resDesc.res.pitch2D.pitchInBytes = dst_pitch; - } - else { - resDesc.resType = CU_RESOURCE_TYPE_LINEAR; - resDesc.res.linear.devPtr = mem.device_pointer; - resDesc.res.linear.format = format; - resDesc.res.linear.numChannels = mem.data_elements; - resDesc.res.linear.sizeInBytes = mem.device_size; - } - - CUDA_TEXTURE_DESC texDesc; - memset(&texDesc, 0, sizeof(texDesc)); - texDesc.addressMode[0] = address_mode; - texDesc.addressMode[1] = address_mode; - texDesc.addressMode[2] = address_mode; - texDesc.filterMode = filter_mode; - texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES; - - check_result_cuda(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL)); - - /* Resize once */ - if (flat_slot >= texture_info.size()) { - /* Allocate some slots in advance, to reduce amount - * of re-allocations. */ - texture_info.resize(flat_slot + 128); - } - - /* Set Mapping and tag that we need to (re-)upload to device */ - TextureInfo &info = texture_info[flat_slot]; - info.data = (uint64_t)cmem->texobject; - info.cl_buffer = 0; - info.interpolation = mem.interpolation; - info.extension = mem.extension; - info.width = mem.data_width; - info.height = mem.data_height; - info.depth = mem.data_depth; - need_texture_info = true; - } - - void mem_copy_to(device_memory &mem) override - { - if (mem.type == MEM_PIXELS) { - assert(!"mem_copy_to not supported for pixels."); - } - else if (mem.type == MEM_TEXTURE) { - tex_free(mem); - tex_alloc(mem); - } - else { - if (!mem.device_pointer) { - generic_alloc(mem); - } - - generic_copy_to(mem); - } - } - - void generic_copy_to(device_memory &mem) - { - if (mem.host_pointer && mem.device_pointer) { - CUDAContextScope scope(cuda_context); - - /* If use_mapped_host of mem is false, the current device only - * uses device memory allocated by cuMemAlloc regardless of - * mem.host_pointer and mem.shared_pointer, and should copy - * data from mem.host_pointer. */ - - if (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer) { - check_result_cuda( - cuMemcpyHtoD((CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size())); - } - } - } - - void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override - { - if (mem.type == MEM_PIXELS && !background) { - assert(!"mem_copy_from not supported for pixels."); - } - else if (mem.type == MEM_TEXTURE) { - assert(!"mem_copy_from not supported for textures."); - } - else { - // Calculate linear memory offset and size - const size_t size = elem * w * h; - const size_t offset = elem * y * w; - - if (mem.host_pointer && mem.device_pointer) { - const CUDAContextScope scope(cuda_context); - check_result_cuda(cuMemcpyDtoH( - (char *)mem.host_pointer + offset, (CUdeviceptr)mem.device_pointer + offset, size)); - } - else if (mem.host_pointer) { - memset((char *)mem.host_pointer + offset, 0, size); - } - } - } - - void mem_zero(device_memory &mem) override - { - if (mem.host_pointer) - memset(mem.host_pointer, 0, mem.memory_size()); - - if (!mem.device_pointer) - mem_alloc(mem); // Need to allocate memory first if it does not exist yet - - /* If use_mapped_host of mem is false, mem.device_pointer currently - * refers to device memory regardless of mem.host_pointer and - * mem.shared_pointer. */ - - if (mem.device_pointer && - (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer)) { - const CUDAContextScope scope(cuda_context); - check_result_cuda(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size())); - } - } - - void mem_free(device_memory &mem) override - { - if (mem.type == MEM_PIXELS && !background) { - assert(!"mem_free not supported for pixels."); - } - else if (mem.type == MEM_TEXTURE) { - tex_free(mem); - } - else { - generic_free(mem); - } - } - - void generic_free(device_memory &mem) - { - if (mem.device_pointer) { - CUDAContextScope scope(cuda_context); - const CUDAMem &cmem = cuda_mem_map[&mem]; - - /* If cmem.use_mapped_host is true, reference counting is used - * to safely free a mapped host memory. */ - - if (cmem.use_mapped_host) { - assert(mem.shared_pointer); - if (mem.shared_pointer) { - assert(mem.shared_counter > 0); - if (--mem.shared_counter == 0) { - if (mem.host_pointer == mem.shared_pointer) { - mem.host_pointer = 0; - } - cuMemFreeHost(mem.shared_pointer); - mem.shared_pointer = 0; - } - } - map_host_used -= mem.device_size; - } - else { - /* Free device memory. */ - cuMemFree(mem.device_pointer); - } - - stats.mem_free(mem.device_size); - mem.device_pointer = 0; - mem.device_size = 0; - - cuda_mem_map.erase(cuda_mem_map.find(&mem)); - } } - void tex_free(device_memory &mem) + void update_launch_params(const char *name, size_t offset, void *data, size_t data_size) { - if (mem.device_pointer) { - CUDAContextScope scope(cuda_context); - const CUDAMem &cmem = cuda_mem_map[&mem]; - - if (cmem.texobject) { - /* Free bindless texture. */ - cuTexObjectDestroy(cmem.texobject); - } + const CUDAContextScope scope(cuContext); - if (cmem.array) { - /* Free array. */ - cuArrayDestroy(cmem.array); - stats.mem_free(mem.device_size); - mem.device_pointer = 0; - mem.device_size = 0; - - cuda_mem_map.erase(cuda_mem_map.find(&mem)); - } - else { - generic_free(mem); - } - } + for (int i = 0; i < info.cpu_threads; ++i) + check_result_cuda( + cuMemcpyHtoD(launch_params.device_pointer + i * launch_params.data_elements + offset, + data, + data_size)); } - void move_textures_to_host(size_t size, bool for_texture) + void task_add(DeviceTask &task) override { - /* Signal to reallocate textures in host memory only. */ - move_texture_to_host = true; - - while (size > 0) { - /* Find suitable memory allocation to move. */ - device_memory *max_mem = NULL; - size_t max_size = 0; - bool max_is_image = false; - - foreach (auto &pair, cuda_mem_map) { - device_memory &mem = *pair.first; - CUDAMem *cmem = &pair.second; - - bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info); - bool is_image = is_texture && (mem.data_height > 1); - - /* Can't move this type of memory. */ - if (!is_texture || cmem->array) { - continue; - } - - /* Already in host memory. */ - if (cmem->use_mapped_host) { - continue; - } - - /* For other textures, only move image textures. */ - if (for_texture && !is_image) { - continue; - } - - /* Try to move largest allocation, prefer moving images. */ - if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) { - max_is_image = is_image; - max_size = mem.device_size; - max_mem = &mem; - } + struct OptiXDeviceTask : public DeviceTask { + OptiXDeviceTask(OptiXDevice *device, DeviceTask &task, int task_index) : DeviceTask(task) + { + // Using task index parameter instead of thread index, since number of CUDA streams may + // differ from number of threads + run = function_bind(&OptiXDevice::thread_run, device, *this, task_index); } + }; - /* Move to host memory. This part is mutex protected since - * multiple CUDA devices could be moving the memory. The - * first one will do it, and the rest will adopt the pointer. */ - if (max_mem) { - VLOG(1) << "Move memory from device to host: " << max_mem->name; - - static thread_mutex move_mutex; - thread_scoped_lock lock(move_mutex); - - /* Preserve the original device pointer, in case of multi device - * we can't change it because the pointer mapping would break. */ - device_ptr prev_pointer = max_mem->device_pointer; - size_t prev_size = max_mem->device_size; - - tex_free(*max_mem); - tex_alloc(*max_mem); - size = (max_size >= size) ? 0 : size - max_size; + // Upload texture information to device if it has changed since last launch + load_texture_info(); - max_mem->device_pointer = prev_pointer; - max_mem->device_size = prev_size; - } - else { - break; - } + if (task.type == DeviceTask::FILM_CONVERT) { + // Execute in main thread because of OpenGL access + film_convert(task, task.buffer, task.rgba_byte, task.rgba_half); + return; } - /* Update texture info array with new pointers. */ - update_texture_info(); - - move_texture_to_host = false; - } - - void const_copy_to(const char *name, void *host, size_t size) override - { - if (strcmp(name, "__data") == 0) { - assert(size <= sizeof(KernelData)); - - // Fix traversable handle on multi devices - KernelData *const data = (KernelData *)host; - *(OptixTraversableHandle *)&data->bvh.scene = tlas_handle; - - update_launch_params(name, offsetof(KernelParams, data), host, size); + if (task.type == DeviceTask::DENOISE_BUFFER) { + // Execute denoising in a single thread (e.g. to avoid race conditions during creation) + task_pool.push(new OptiXDeviceTask(this, task, 0)); + return; } - } - - device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) override - { - return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset)); - } - - void task_add(DeviceTask &task) override - { - // Upload texture information to device if it has changed since last launch - update_texture_info(); // Split task into smaller ones list<DeviceTask> tasks; task.split(tasks, info.cpu_threads); // Queue tasks in internal task pool - struct OptiXDeviceTask : public DeviceTask { - OptiXDeviceTask(OptiXDevice *device, DeviceTask &task, int task_index) : DeviceTask(task) - { - // Using task index parameter instead of thread index, since number of CUDA streams may - // differ from number of threads - run = function_bind(&OptiXDevice::thread_run, device, *this, task_index); - } - }; - int task_index = 0; for (DeviceTask &task : tasks) task_pool.push(new OptiXDeviceTask(this, task, task_index++)); @@ -2195,403 +1501,6 @@ class OptiXDevice : public Device { // Cancel any remaining tasks in the internal pool task_pool.cancel(); } - - bool denoising_non_local_means(device_ptr image_ptr, - device_ptr guide_ptr, - device_ptr variance_ptr, - device_ptr out_ptr, - DenoisingTask *task, - int thread_index) - { - if (have_error()) - return false; - - int stride = task->buffer.stride; - int w = task->buffer.width; - int h = task->buffer.h; - int r = task->nlm_state.r; - int f = task->nlm_state.f; - float a = task->nlm_state.a; - float k_2 = task->nlm_state.k_2; - - int pass_stride = task->buffer.pass_stride; - int num_shifts = (2 * r + 1) * (2 * r + 1); - int channel_offset = task->nlm_state.is_color ? task->buffer.pass_stride : 0; - int frame_offset = 0; - - CUdeviceptr difference = (CUdeviceptr)task->buffer.temporary_mem.device_pointer; - CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts; - CUdeviceptr weightAccum = difference + 2 * sizeof(float) * pass_stride * num_shifts; - CUdeviceptr scale_ptr = 0; - - check_result_cuda_ret( - cuMemsetD8Async(weightAccum, 0, sizeof(float) * pass_stride, cuda_stream[thread_index])); - check_result_cuda_ret( - cuMemsetD8Async(out_ptr, 0, sizeof(float) * pass_stride, cuda_stream[thread_index])); - - { - CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMUpdateOutput; - check_result_cuda_ret(cuModuleGetFunction( - &cuNLMCalcDifference, cuda_filter_module, "kernel_cuda_filter_nlm_calc_difference")); - check_result_cuda_ret( - cuModuleGetFunction(&cuNLMBlur, cuda_filter_module, "kernel_cuda_filter_nlm_blur")); - check_result_cuda_ret(cuModuleGetFunction( - &cuNLMCalcWeight, cuda_filter_module, "kernel_cuda_filter_nlm_calc_weight")); - check_result_cuda_ret(cuModuleGetFunction( - &cuNLMUpdateOutput, cuda_filter_module, "kernel_cuda_filter_nlm_update_output")); - - check_result_cuda_ret(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1)); - check_result_cuda_ret(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1)); - check_result_cuda_ret(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1)); - check_result_cuda_ret(cuFuncSetCacheConfig(cuNLMUpdateOutput, CU_FUNC_CACHE_PREFER_L1)); - - CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, w * h, num_shifts); - - void *calc_difference_args[] = {&guide_ptr, - &variance_ptr, - &scale_ptr, - &difference, - &w, - &h, - &stride, - &pass_stride, - &r, - &channel_offset, - &frame_offset, - &a, - &k_2}; - void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f}; - void *calc_weight_args[] = { - &blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f}; - void *update_output_args[] = {&blurDifference, - &image_ptr, - &out_ptr, - &weightAccum, - &w, - &h, - &stride, - &pass_stride, - &channel_offset, - &r, - &f}; - - CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args); - CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args); - CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args); - CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args); - CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args); - } - - { - CUfunction cuNLMNormalize; - check_result_cuda_ret(cuModuleGetFunction( - &cuNLMNormalize, cuda_filter_module, "kernel_cuda_filter_nlm_normalize")); - check_result_cuda_ret(cuFuncSetCacheConfig(cuNLMNormalize, CU_FUNC_CACHE_PREFER_L1)); - void *normalize_args[] = {&out_ptr, &weightAccum, &w, &h, &stride}; - CUDA_GET_BLOCKSIZE(cuNLMNormalize, w, h); - CUDA_LAUNCH_KERNEL(cuNLMNormalize, normalize_args); - check_result_cuda_ret(cuStreamSynchronize(cuda_stream[thread_index])); - } - - return !have_error(); - } - - bool denoising_construct_transform(DenoisingTask *task, int thread_index) - { - if (have_error()) - return false; - - CUfunction cuFilterConstructTransform; - check_result_cuda_ret(cuModuleGetFunction(&cuFilterConstructTransform, - cuda_filter_module, - "kernel_cuda_filter_construct_transform")); - check_result_cuda_ret( - cuFuncSetCacheConfig(cuFilterConstructTransform, CU_FUNC_CACHE_PREFER_SHARED)); - CUDA_GET_BLOCKSIZE(cuFilterConstructTransform, task->storage.w, task->storage.h); - - void *args[] = {&task->buffer.mem.device_pointer, - &task->tile_info_mem.device_pointer, - &task->storage.transform.device_pointer, - &task->storage.rank.device_pointer, - &task->filter_area, - &task->rect, - &task->radius, - &task->pca_threshold, - &task->buffer.pass_stride, - &task->buffer.frame_stride, - &task->buffer.use_time}; - CUDA_LAUNCH_KERNEL(cuFilterConstructTransform, args); - check_result_cuda_ret(cuCtxSynchronize()); - - return !have_error(); - } - - bool denoising_accumulate(device_ptr color_ptr, - device_ptr color_variance_ptr, - device_ptr scale_ptr, - int frame, - DenoisingTask *task, - int thread_index) - { - if (have_error()) - return false; - - int r = task->radius; - int f = 4; - float a = 1.0f; - float k_2 = task->nlm_k_2; - - int w = task->reconstruction_state.source_w; - int h = task->reconstruction_state.source_h; - int stride = task->buffer.stride; - int frame_offset = frame * task->buffer.frame_stride; - int t = task->tile_info->frames[frame]; - - int pass_stride = task->buffer.pass_stride; - int num_shifts = (2 * r + 1) * (2 * r + 1); - - CUdeviceptr difference = (CUdeviceptr)task->buffer.temporary_mem.device_pointer; - CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts; - - CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian; - check_result_cuda_ret(cuModuleGetFunction( - &cuNLMCalcDifference, cuda_filter_module, "kernel_cuda_filter_nlm_calc_difference")); - check_result_cuda_ret( - cuModuleGetFunction(&cuNLMBlur, cuda_filter_module, "kernel_cuda_filter_nlm_blur")); - check_result_cuda_ret(cuModuleGetFunction( - &cuNLMCalcWeight, cuda_filter_module, "kernel_cuda_filter_nlm_calc_weight")); - check_result_cuda_ret(cuModuleGetFunction( - &cuNLMConstructGramian, cuda_filter_module, "kernel_cuda_filter_nlm_construct_gramian")); - - check_result_cuda_ret(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1)); - check_result_cuda_ret(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1)); - check_result_cuda_ret(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1)); - check_result_cuda_ret( - cuFuncSetCacheConfig(cuNLMConstructGramian, CU_FUNC_CACHE_PREFER_SHARED)); - - CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, - task->reconstruction_state.source_w * - task->reconstruction_state.source_h, - num_shifts); - - void *calc_difference_args[] = {&color_ptr, - &color_variance_ptr, - &scale_ptr, - &difference, - &w, - &h, - &stride, - &pass_stride, - &r, - &pass_stride, - &frame_offset, - &a, - &k_2}; - void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f}; - void *calc_weight_args[] = { - &blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f}; - void *construct_gramian_args[] = {&t, - &blurDifference, - &task->buffer.mem.device_pointer, - &task->storage.transform.device_pointer, - &task->storage.rank.device_pointer, - &task->storage.XtWX.device_pointer, - &task->storage.XtWY.device_pointer, - &task->reconstruction_state.filter_window, - &w, - &h, - &stride, - &pass_stride, - &r, - &f, - &frame_offset, - &task->buffer.use_time}; - - CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args); - CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args); - CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args); - CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args); - CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args); - check_result_cuda_ret(cuCtxSynchronize()); - - return !have_error(); - } - - bool denoising_solve(device_ptr output_ptr, DenoisingTask *task, int thread_index) - { - if (have_error()) - return false; - - CUfunction cuFinalize; - check_result_cuda_ret( - cuModuleGetFunction(&cuFinalize, cuda_filter_module, "kernel_cuda_filter_finalize")); - check_result_cuda_ret(cuFuncSetCacheConfig(cuFinalize, CU_FUNC_CACHE_PREFER_L1)); - void *finalize_args[] = {&output_ptr, - &task->storage.rank.device_pointer, - &task->storage.XtWX.device_pointer, - &task->storage.XtWY.device_pointer, - &task->filter_area, - &task->reconstruction_state.buffer_params.x, - &task->render_buffer.samples}; - CUDA_GET_BLOCKSIZE( - cuFinalize, task->reconstruction_state.source_w, task->reconstruction_state.source_h); - CUDA_LAUNCH_KERNEL(cuFinalize, finalize_args); - check_result_cuda_ret(cuStreamSynchronize(cuda_stream[thread_index])); - - return !have_error(); - } - - bool denoising_combine_halves(device_ptr a_ptr, - device_ptr b_ptr, - device_ptr mean_ptr, - device_ptr variance_ptr, - int r, - int4 rect, - DenoisingTask *task, - int thread_index) - { - if (have_error()) - return false; - - CUfunction cuFilterCombineHalves; - check_result_cuda_ret(cuModuleGetFunction( - &cuFilterCombineHalves, cuda_filter_module, "kernel_cuda_filter_combine_halves")); - check_result_cuda_ret(cuFuncSetCacheConfig(cuFilterCombineHalves, CU_FUNC_CACHE_PREFER_L1)); - CUDA_GET_BLOCKSIZE( - cuFilterCombineHalves, task->rect.z - task->rect.x, task->rect.w - task->rect.y); - - void *args[] = {&mean_ptr, &variance_ptr, &a_ptr, &b_ptr, &rect, &r}; - CUDA_LAUNCH_KERNEL(cuFilterCombineHalves, args); - check_result_cuda_ret(cuStreamSynchronize(cuda_stream[thread_index])); - - return !have_error(); - } - - bool denoising_divide_shadow(device_ptr a_ptr, - device_ptr b_ptr, - device_ptr sample_variance_ptr, - device_ptr sv_variance_ptr, - device_ptr buffer_variance_ptr, - DenoisingTask *task, - int thread_index) - { - if (have_error()) - return false; - - CUfunction cuFilterDivideShadow; - check_result_cuda_ret(cuModuleGetFunction( - &cuFilterDivideShadow, cuda_filter_module, "kernel_cuda_filter_divide_shadow")); - check_result_cuda_ret(cuFuncSetCacheConfig(cuFilterDivideShadow, CU_FUNC_CACHE_PREFER_L1)); - CUDA_GET_BLOCKSIZE( - cuFilterDivideShadow, task->rect.z - task->rect.x, task->rect.w - task->rect.y); - - void *args[] = {&task->render_buffer.samples, - &task->tile_info_mem.device_pointer, - &a_ptr, - &b_ptr, - &sample_variance_ptr, - &sv_variance_ptr, - &buffer_variance_ptr, - &task->rect, - &task->render_buffer.pass_stride, - &task->render_buffer.offset}; - CUDA_LAUNCH_KERNEL(cuFilterDivideShadow, args); - check_result_cuda_ret(cuStreamSynchronize(cuda_stream[thread_index])); - - return !have_error(); - } - - bool denoising_get_feature(int mean_offset, - int variance_offset, - device_ptr mean_ptr, - device_ptr variance_ptr, - float scale, - DenoisingTask *task, - int thread_index) - { - if (have_error()) - return false; - - CUfunction cuFilterGetFeature; - check_result_cuda_ret(cuModuleGetFunction( - &cuFilterGetFeature, cuda_filter_module, "kernel_cuda_filter_get_feature")); - check_result_cuda_ret(cuFuncSetCacheConfig(cuFilterGetFeature, CU_FUNC_CACHE_PREFER_L1)); - CUDA_GET_BLOCKSIZE( - cuFilterGetFeature, task->rect.z - task->rect.x, task->rect.w - task->rect.y); - - void *args[] = {&task->render_buffer.samples, - &task->tile_info_mem.device_pointer, - &mean_offset, - &variance_offset, - &mean_ptr, - &variance_ptr, - &scale, - &task->rect, - &task->render_buffer.pass_stride, - &task->render_buffer.offset}; - CUDA_LAUNCH_KERNEL(cuFilterGetFeature, args); - check_result_cuda_ret(cuStreamSynchronize(cuda_stream[thread_index])); - - return !have_error(); - } - - bool denoising_write_feature(int out_offset, - device_ptr from_ptr, - device_ptr buffer_ptr, - DenoisingTask *task, - int thread_index) - { - if (have_error()) - return false; - - CUfunction cuFilterWriteFeature; - check_result_cuda_ret(cuModuleGetFunction( - &cuFilterWriteFeature, cuda_filter_module, "kernel_cuda_filter_write_feature")); - check_result_cuda_ret(cuFuncSetCacheConfig(cuFilterWriteFeature, CU_FUNC_CACHE_PREFER_L1)); - CUDA_GET_BLOCKSIZE(cuFilterWriteFeature, task->filter_area.z, task->filter_area.w); - - void *args[] = {&task->render_buffer.samples, - &task->reconstruction_state.buffer_params, - &task->filter_area, - &from_ptr, - &buffer_ptr, - &out_offset, - &task->rect}; - CUDA_LAUNCH_KERNEL(cuFilterWriteFeature, args); - check_result_cuda_ret(cuStreamSynchronize(cuda_stream[thread_index])); - - return !have_error(); - } - - bool denoising_detect_outliers(device_ptr image_ptr, - device_ptr variance_ptr, - device_ptr depth_ptr, - device_ptr output_ptr, - DenoisingTask *task, - int thread_index) - { - if (have_error()) - return false; - - CUfunction cuFilterDetectOutliers; - check_result_cuda_ret(cuModuleGetFunction( - &cuFilterDetectOutliers, cuda_filter_module, "kernel_cuda_filter_detect_outliers")); - check_result_cuda_ret(cuFuncSetCacheConfig(cuFilterDetectOutliers, CU_FUNC_CACHE_PREFER_L1)); - CUDA_GET_BLOCKSIZE( - cuFilterDetectOutliers, task->rect.z - task->rect.x, task->rect.w - task->rect.y); - - void *args[] = {&image_ptr, - &variance_ptr, - &depth_ptr, - &output_ptr, - &task->rect, - &task->buffer.pass_stride}; - - CUDA_LAUNCH_KERNEL(cuFilterDetectOutliers, args); - check_result_cuda_ret(cuStreamSynchronize(cuda_stream[thread_index])); - - return !have_error(); - } }; bool device_optix_init() @@ -2603,14 +1512,6 @@ bool device_optix_init() if (!device_cuda_init()) return false; -# ifdef WITH_CUDA_DYNLOAD - // Load NVRTC function pointers for adaptive kernel compilation - if (DebugFlags().cuda.adaptive_compile && cuewInit(CUEW_INIT_NVRTC) != CUEW_SUCCESS) { - VLOG(1) - << "CUEW initialization failed for NVRTC. Adaptive kernel compilation won't be available."; - } -# endif - const OptixResult result = optixInit(); if (result == OPTIX_ERROR_UNSUPPORTED_ABI_VERSION) { @@ -2657,7 +1558,7 @@ void device_optix_info(vector<DeviceInfo> &devices) } // Only add devices with RTX support - if (rtcore_version == 0) + if (rtcore_version == 0 && !getenv("CYCLES_OPTIX_TEST")) it = cuda_devices.erase(it); else ++it; diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp index 42e597a34d7..f22d8761058 100644 --- a/intern/cycles/device/device_split_kernel.cpp +++ b/intern/cycles/device/device_split_kernel.cpp @@ -55,6 +55,10 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) kernel_next_iteration_setup = NULL; kernel_indirect_subsurface = NULL; kernel_buffer_update = NULL; + kernel_adaptive_stopping = NULL; + kernel_adaptive_filter_x = NULL; + kernel_adaptive_filter_y = NULL; + kernel_adaptive_adjust_samples = NULL; } DeviceSplitKernel::~DeviceSplitKernel() @@ -83,6 +87,10 @@ DeviceSplitKernel::~DeviceSplitKernel() delete kernel_next_iteration_setup; delete kernel_indirect_subsurface; delete kernel_buffer_update; + delete kernel_adaptive_stopping; + delete kernel_adaptive_filter_x; + delete kernel_adaptive_filter_y; + delete kernel_adaptive_adjust_samples; } bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures &requested_features) @@ -114,6 +122,10 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures &requested_fe LOAD_KERNEL(next_iteration_setup); LOAD_KERNEL(indirect_subsurface); LOAD_KERNEL(buffer_update); + LOAD_KERNEL(adaptive_stopping); + LOAD_KERNEL(adaptive_filter_x); + LOAD_KERNEL(adaptive_filter_y); + LOAD_KERNEL(adaptive_adjust_samples); #undef LOAD_KERNEL @@ -202,13 +214,21 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, /* initial guess to start rolling average */ const int initial_num_samples = 1; /* approx number of samples per second */ - int samples_per_second = (avg_time_per_sample > 0.0) ? - int(double(time_multiplier) / avg_time_per_sample) + 1 : - initial_num_samples; + const int samples_per_second = (avg_time_per_sample > 0.0) ? + int(double(time_multiplier) / avg_time_per_sample) + 1 : + initial_num_samples; RenderTile subtile = tile; subtile.start_sample = tile.sample; - subtile.num_samples = min(samples_per_second, + subtile.num_samples = samples_per_second; + + if (task->adaptive_sampling.use) { + subtile.num_samples = task->adaptive_sampling.align_dynamic_samples(subtile.start_sample, + subtile.num_samples); + } + + /* Don't go beyond requested number of samples. */ + subtile.num_samples = min(subtile.num_samples, tile.start_sample + tile.num_samples - tile.sample); if (device->have_error()) { @@ -302,6 +322,23 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, } } + int filter_sample = tile.sample + subtile.num_samples - 1; + if (task->adaptive_sampling.use && task->adaptive_sampling.need_filter(filter_sample)) { + size_t buffer_size[2]; + buffer_size[0] = round_up(tile.w, local_size[0]); + buffer_size[1] = round_up(tile.h, local_size[1]); + kernel_adaptive_stopping->enqueue( + KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data); + buffer_size[0] = round_up(tile.h, local_size[0]); + buffer_size[1] = round_up(1, local_size[1]); + kernel_adaptive_filter_x->enqueue( + KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data); + buffer_size[0] = round_up(tile.w, local_size[0]); + buffer_size[1] = round_up(1, local_size[1]); + kernel_adaptive_filter_y->enqueue( + KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data); + } + double time_per_sample = ((time_dt() - start_time) / subtile.num_samples); if (avg_time_per_sample == 0.0) { @@ -324,6 +361,28 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, } } + if (task->adaptive_sampling.use) { + /* Reset the start samples. */ + RenderTile subtile = tile; + subtile.start_sample = tile.start_sample; + subtile.num_samples = tile.sample - tile.start_sample; + enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size), + subtile, + num_global_elements, + kgbuffer, + kernel_data, + split_data, + ray_state, + queue_index, + use_queues_flag, + work_pool_wgs); + size_t buffer_size[2]; + buffer_size[0] = round_up(tile.w, local_size[0]); + buffer_size[1] = round_up(tile.h, local_size[1]); + kernel_adaptive_adjust_samples->enqueue( + KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data); + } + return true; } diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h index 6ff326bf214..9d6b9efdd62 100644 --- a/intern/cycles/device/device_split_kernel.h +++ b/intern/cycles/device/device_split_kernel.h @@ -75,6 +75,10 @@ class DeviceSplitKernel { SplitKernelFunction *kernel_next_iteration_setup; SplitKernelFunction *kernel_indirect_subsurface; SplitKernelFunction *kernel_buffer_update; + SplitKernelFunction *kernel_adaptive_stopping; + SplitKernelFunction *kernel_adaptive_filter_x; + SplitKernelFunction *kernel_adaptive_filter_y; + SplitKernelFunction *kernel_adaptive_adjust_samples; /* Global memory variables [porting]; These memory is used for * co-operation between different kernels; Data written by one diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp index 376ad06a734..c36b1344c3b 100644 --- a/intern/cycles/device/device_task.cpp +++ b/intern/cycles/device/device_task.cpp @@ -115,7 +115,7 @@ void DeviceTask::split(list<DeviceTask> &tasks, int num, int max_size) void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples) { - if ((type != RENDER) && (type != SHADER)) + if (type == FILM_CONVERT) return; if (update_progress_sample) { @@ -136,4 +136,59 @@ void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples) } } +/* Adaptive Sampling */ + +AdaptiveSampling::AdaptiveSampling() + : use(true), adaptive_step(ADAPTIVE_SAMPLE_STEP), min_samples(0) +{ +} + +/* Render samples in steps that align with the adaptive filtering. */ +int AdaptiveSampling::align_static_samples(int samples) const +{ + if (samples > adaptive_step) { + /* Make multiple of adaptive_step. */ + while (samples % adaptive_step != 0) { + samples--; + } + } + else if (samples < adaptive_step) { + /* Make divisor of adaptive_step. */ + while (adaptive_step % samples != 0) { + samples--; + } + } + + return max(samples, 1); +} + +/* Render samples in steps that align with the adaptive filtering, with the + * suggested number of samples dynamically changing. */ +int AdaptiveSampling::align_dynamic_samples(int offset, int samples) const +{ + /* Round so that we end up on multiples of adaptive_samples. */ + samples += offset; + + if (samples > adaptive_step) { + /* Make multiple of adaptive_step. */ + while (samples % adaptive_step != 0) { + samples--; + } + } + + samples -= offset; + + return max(samples, 1); +} + +bool AdaptiveSampling::need_filter(int sample) const +{ + if (sample > min_samples) { + return (sample & (adaptive_step - 1)) == (adaptive_step - 1); + } + else { + return false; + } +} + CCL_NAMESPACE_END diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h index 1b1e97cdb10..8c4e682adb1 100644 --- a/intern/cycles/device/device_task.h +++ b/intern/cycles/device/device_task.h @@ -47,7 +47,7 @@ class DenoiseParams { int neighbor_frames; /* Clamp the input to the range of +-1e8. Should be enough for any legitimate data. */ bool clamp_input; - /* Controls which passes the OptiX AI denoiser should use as input. */ + /* Passes handed over to the OptiX denoiser (default to color + albedo). */ int optix_input_passes; DenoiseParams() @@ -58,13 +58,26 @@ class DenoiseParams { relative_pca = false; neighbor_frames = 2; clamp_input = true; - optix_input_passes = 1; + optix_input_passes = 2; } }; +class AdaptiveSampling { + public: + AdaptiveSampling(); + + int align_static_samples(int samples) const; + int align_dynamic_samples(int offset, int samples) const; + bool need_filter(int sample) const; + + bool use; + int adaptive_step; + int min_samples; +}; + class DeviceTask : public Task { public: - typedef enum { RENDER, FILM_CONVERT, SHADER } Type; + typedef enum { RENDER, FILM_CONVERT, SHADER, DENOISE_BUFFER } Type; Type type; int x, y, w, h; @@ -81,7 +94,7 @@ class DeviceTask : public Task { int shader_filter; int shader_x, shader_w; - int passes_size; + RenderBuffers *buffers; explicit DeviceTask(Type type = RENDER); @@ -90,7 +103,7 @@ class DeviceTask : public Task { void update_progress(RenderTile *rtile, int pixel_samples = -1); - function<bool(Device *device, RenderTile &)> acquire_tile; + function<bool(Device *device, RenderTile &, uint)> acquire_tile; function<void(long, int)> update_progress_sample; function<void(RenderTile &)> update_tile_sample; function<void(RenderTile &)> release_tile; @@ -98,6 +111,7 @@ class DeviceTask : public Task { function<void(RenderTile *, Device *)> map_neighbor_tiles; function<void(RenderTile *, Device *)> unmap_neighbor_tiles; + uint tile_types; DenoiseParams denoising; bool denoising_from_render; vector<int> denoising_frames; @@ -114,7 +128,7 @@ class DeviceTask : public Task { bool need_finish_queue; bool integrator_branched; - int2 requested_tile_size; + AdaptiveSampling adaptive_sampling; protected: double last_update_time; diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/device_opencl.h index 61b1e3e3b6b..d6f4fb43061 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/device_opencl.h @@ -88,9 +88,12 @@ class OpenCLInfo { static bool device_supported(const string &platform_name, const cl_device_id device_id); static bool platform_version_check(cl_platform_id platform, string *error = NULL); static bool device_version_check(cl_device_id device, string *error = NULL); + static bool get_device_version(cl_device_id device, + int *r_major, + int *r_minor, + string *error = NULL); static string get_hardware_id(const string &platform_name, cl_device_id device_id); - static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices, - bool force_all = false); + static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices); /* ** Some handy shortcuts to low level cl*GetInfo() functions. ** */ @@ -428,8 +431,10 @@ class OpenCLDevice : public Device { int mem_sub_ptr_alignment(); void const_copy_to(const char *name, void *host, size_t size); - void tex_alloc(device_memory &mem); - void tex_free(device_memory &mem); + void global_alloc(device_memory &mem); + void global_free(device_memory &mem); + void tex_alloc(device_texture &mem); + void tex_free(device_texture &mem); size_t global_size_round_up(int group_size, int global_size); void enqueue_kernel(cl_kernel kernel, @@ -445,6 +450,7 @@ class OpenCLDevice : public Device { device_ptr rgba_byte, device_ptr rgba_half); void shader(DeviceTask &task); + void update_adaptive(DeviceTask &task, RenderTile &tile, int sample); void denoise(RenderTile &tile, DenoisingTask &denoising); diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp index 76f9ce7a18f..2766f85d17c 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/device_opencl_impl.cpp @@ -16,7 +16,7 @@ #ifdef WITH_OPENCL -# include "device/opencl/opencl.h" +# include "device/opencl/device_opencl.h" # include "kernel/kernel_types.h" # include "kernel/split/kernel_split_data_types.h" @@ -56,7 +56,11 @@ static const string SPLIT_BUNDLE_KERNELS = "enqueue_inactive " "next_iteration_setup " "indirect_subsurface " - "buffer_update"; + "buffer_update " + "adaptive_stopping " + "adaptive_filter_x " + "adaptive_filter_y " + "adaptive_adjust_samples"; const string OpenCLDevice::get_opencl_program_name(const string &kernel_name) { @@ -253,16 +257,16 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels( /* Ordered with most complex kernels first, to reduce overall compile time. */ ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter); + ADD_SPLIT_KERNEL_PROGRAM(direct_lighting); + ADD_SPLIT_KERNEL_PROGRAM(indirect_background); if (requested_features.use_volume || is_preview) { ADD_SPLIT_KERNEL_PROGRAM(do_volume); } + ADD_SPLIT_KERNEL_PROGRAM(shader_eval); + ADD_SPLIT_KERNEL_PROGRAM(lamp_emission); + ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao); ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_dl); ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_ao); - ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao); - ADD_SPLIT_KERNEL_PROGRAM(lamp_emission); - ADD_SPLIT_KERNEL_PROGRAM(direct_lighting); - ADD_SPLIT_KERNEL_PROGRAM(indirect_background); - ADD_SPLIT_KERNEL_PROGRAM(shader_eval); /* Quick kernels bundled in a single program to reduce overhead of starting * Blender processes. */ @@ -283,6 +287,10 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels( ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(next_iteration_setup); ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(indirect_subsurface); ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(buffer_update); + ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_stopping); + ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_filter_x); + ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_filter_y); + ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_adjust_samples); programs.push_back(&program_split); # undef ADD_SPLIT_KERNEL_PROGRAM @@ -605,7 +613,7 @@ OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, b kernel_programs(this), preview_programs(this), memory_manager(this), - texture_info(this, "__texture_info", MEM_TEXTURE) + texture_info(this, "__texture_info", MEM_GLOBAL) { cpPlatform = NULL; cdDevice = NULL; @@ -937,7 +945,7 @@ void OpenCLDevice::mem_alloc(device_memory &mem) cl_mem_flags mem_flag; void *mem_ptr = NULL; - if (mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE) + if (mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) mem_flag = CL_MEM_READ_ONLY; else mem_flag = CL_MEM_READ_WRITE; @@ -961,9 +969,13 @@ void OpenCLDevice::mem_alloc(device_memory &mem) void OpenCLDevice::mem_copy_to(device_memory &mem) { - if (mem.type == MEM_TEXTURE) { - tex_free(mem); - tex_alloc(mem); + if (mem.type == MEM_GLOBAL) { + global_free(mem); + global_alloc(mem); + } + else if (mem.type == MEM_TEXTURE) { + tex_free((device_texture &)mem); + tex_alloc((device_texture &)mem); } else { if (!mem.device_pointer) { @@ -1069,8 +1081,11 @@ void OpenCLDevice::mem_zero(device_memory &mem) void OpenCLDevice::mem_free(device_memory &mem) { - if (mem.type == MEM_TEXTURE) { - tex_free(mem); + if (mem.type == MEM_GLOBAL) { + global_free(mem); + } + else if (mem.type == MEM_TEXTURE) { + tex_free((device_texture &)mem); } else { if (mem.device_pointer) { @@ -1093,7 +1108,7 @@ int OpenCLDevice::mem_sub_ptr_alignment() device_ptr OpenCLDevice::mem_alloc_sub_ptr(device_memory &mem, int offset, int size) { cl_mem_flags mem_flag; - if (mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE) + if (mem.type == MEM_READ_ONLY || mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) mem_flag = CL_MEM_READ_ONLY; else mem_flag = CL_MEM_READ_WRITE; @@ -1133,9 +1148,9 @@ void OpenCLDevice::const_copy_to(const char *name, void *host, size_t size) data->copy_to_device(); } -void OpenCLDevice::tex_alloc(device_memory &mem) +void OpenCLDevice::global_alloc(device_memory &mem) { - VLOG(1) << "Texture allocate: " << mem.name << ", " + VLOG(1) << "Global memory allocate: " << mem.name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; @@ -1147,7 +1162,7 @@ void OpenCLDevice::tex_alloc(device_memory &mem) textures_need_update = true; } -void OpenCLDevice::tex_free(device_memory &mem) +void OpenCLDevice::global_free(device_memory &mem) { if (mem.device_pointer) { mem.device_pointer = 0; @@ -1165,6 +1180,25 @@ void OpenCLDevice::tex_free(device_memory &mem) } } +void OpenCLDevice::tex_alloc(device_texture &mem) +{ + VLOG(1) << "Texture allocate: " << mem.name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; + + memory_manager.alloc(mem.name, mem); + /* Set the pointer to non-null to keep code that inspects its value from thinking its + * unallocated. */ + mem.device_pointer = 1; + textures[mem.name] = &mem; + textures_need_update = true; +} + +void OpenCLDevice::tex_free(device_texture &mem) +{ + global_free(mem); +} + size_t OpenCLDevice::global_size_round_up(int group_size, int global_size) { int r = global_size % group_size; @@ -1265,10 +1299,10 @@ void OpenCLDevice::flush_texture_buffers() foreach (TexturesMap::value_type &tex, textures) { string name = tex.first; + device_memory *mem = tex.second; - if (string_startswith(name, "__tex_image")) { - int pos = name.rfind("_"); - int id = atoi(name.data() + pos + 1); + if (mem->type == MEM_TEXTURE) { + const uint id = ((device_texture *)mem)->slot; texture_slots.push_back(texture_slot_t(name, num_data_slots + id)); num_slots = max(num_slots, num_data_slots + id + 1); } @@ -1281,22 +1315,20 @@ void OpenCLDevice::flush_texture_buffers() /* Fill in descriptors */ foreach (texture_slot_t &slot, texture_slots) { + device_memory *mem = textures[slot.name]; TextureInfo &info = texture_info[slot.slot]; MemoryManager::BufferDescriptor desc = memory_manager.get_descriptor(slot.name); - info.data = desc.offset; - info.cl_buffer = desc.device_buffer; - - if (string_startswith(slot.name, "__tex_image")) { - device_memory *mem = textures[slot.name]; - info.width = mem->data_width; - info.height = mem->data_height; - info.depth = mem->data_depth; - - info.interpolation = mem->interpolation; - info.extension = mem->extension; + if (mem->type == MEM_TEXTURE) { + info = ((device_texture *)mem)->info; + } + else { + memset(&info, 0, sizeof(TextureInfo)); } + + info.data = desc.offset; + info.cl_buffer = desc.device_buffer; } /* Force write of descriptors. */ @@ -1308,13 +1340,7 @@ void OpenCLDevice::thread_run(DeviceTask *task) { flush_texture_buffers(); - if (task->type == DeviceTask::FILM_CONVERT) { - film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half); - } - else if (task->type == DeviceTask::SHADER) { - shader(*task); - } - else if (task->type == DeviceTask::RENDER) { + if (task->type == DeviceTask::RENDER) { RenderTile tile; DenoisingTask denoising(this, *task); @@ -1323,7 +1349,7 @@ void OpenCLDevice::thread_run(DeviceTask *task) kgbuffer.alloc_to_device(1); /* Keep rendering tiles until done. */ - while (task->acquire_tile(this, tile)) { + while (task->acquire_tile(this, tile, task->tile_types)) { if (tile.task == RenderTile::PATH_TRACE) { assert(tile.task == RenderTile::PATH_TRACE); scoped_timer timer(&tile.buffers->render_time); @@ -1352,6 +1378,30 @@ void OpenCLDevice::thread_run(DeviceTask *task) kgbuffer.free(); } + else if (task->type == DeviceTask::SHADER) { + shader(*task); + } + else if (task->type == DeviceTask::FILM_CONVERT) { + film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half); + } + else if (task->type == DeviceTask::DENOISE_BUFFER) { + RenderTile tile; + tile.x = task->x; + tile.y = task->y; + tile.w = task->w; + tile.h = task->h; + tile.buffer = task->buffer; + tile.sample = task->sample + task->num_samples; + tile.num_samples = task->num_samples; + tile.start_sample = task->sample; + tile.offset = task->offset; + tile.stride = task->stride; + tile.buffers = task->buffers; + + DenoisingTask denoising(this, *task); + denoise(tile, denoising); + task->update_progress(&tile, tile.w * tile.h); + } } void OpenCLDevice::film_convert(DeviceTask &task, @@ -1846,6 +1896,17 @@ string OpenCLDevice::kernel_build_options(const string *debug_src) { string build_options = "-cl-no-signed-zeros -cl-mad-enable "; + /* Build with OpenCL 2.0 if available, this improves performance + * with AMD OpenCL drivers on Windows and Linux (legacy drivers). + * Note that OpenCL selects the highest 1.x version by default, + * only for 2.0 do we need the explicit compiler flag. */ + int version_major, version_minor; + if (OpenCLInfo::get_device_version(cdDevice, &version_major, &version_minor)) { + if (version_major >= 2) { + build_options += "-cl-std=CL2.0 "; + } + } + if (platform_name == "NVIDIA CUDA") { build_options += "-D__KERNEL_OPENCL_NVIDIA__ " diff --git a/intern/cycles/device/opencl/memory_manager.cpp b/intern/cycles/device/opencl/memory_manager.cpp index 06d4746a86e..fedb3ea8c6a 100644 --- a/intern/cycles/device/opencl/memory_manager.cpp +++ b/intern/cycles/device/opencl/memory_manager.cpp @@ -18,7 +18,7 @@ # include "util/util_foreach.h" -# include "device/opencl/opencl.h" +# include "device/opencl/device_opencl.h" # include "device/opencl/memory_manager.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/device/opencl/memory_manager.h b/intern/cycles/device/opencl/memory_manager.h index 2fbc97a0756..23624f837a6 100644 --- a/intern/cycles/device/opencl/memory_manager.h +++ b/intern/cycles/device/opencl/memory_manager.h @@ -19,8 +19,8 @@ #include "device/device.h" #include "util/util_map.h" -#include "util/util_vector.h" #include "util/util_string.h" +#include "util/util_vector.h" #include "clew.h" diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp index 3eeff31f8c2..b8b07cf2947 100644 --- a/intern/cycles/device/opencl/opencl_util.cpp +++ b/intern/cycles/device/opencl/opencl_util.cpp @@ -16,15 +16,16 @@ #ifdef WITH_OPENCL -# include "device/opencl/opencl.h" # include "device/device_intern.h" +# include "device/opencl/device_opencl.h" # include "util/util_debug.h" # include "util/util_logging.h" # include "util/util_md5.h" # include "util/util_path.h" -# include "util/util_time.h" +# include "util/util_semaphore.h" # include "util/util_system.h" +# include "util/util_time.h" using std::cerr; using std::endl; @@ -390,8 +391,27 @@ static void escape_python_string(string &str) string_replace(str, "'", "\'"); } +static int opencl_compile_process_limit() +{ + /* Limit number of concurrent processes compiling, with a heuristic based + * on total physical RAM and estimate of memory usage needed when compiling + * with all Cycles features enabled. + * + * This is somewhat arbitrary as we don't know the actual available RAM or + * how much the kernel compilation will needed depending on the features, but + * better than not limiting at all. */ + static const int64_t GB = 1024LL * 1024LL * 1024LL; + static const int64_t process_memory = 2 * GB; + static const int64_t base_memory = 2 * GB; + static const int64_t system_memory = system_physical_ram(); + static const int64_t process_limit = (system_memory - base_memory) / process_memory; + + return max((int)process_limit, 1); +} + bool OpenCLDevice::OpenCLProgram::compile_separate(const string &clbin) { + /* Construct arguments. */ vector<string> args; args.push_back("--background"); args.push_back("--factory-startup"); @@ -419,14 +439,23 @@ bool OpenCLDevice::OpenCLProgram::compile_separate(const string &clbin) kernel_file_escaped.c_str(), clbin_escaped.c_str())); - double starttime = time_dt(); + /* Limit number of concurrent processes compiling. */ + static thread_counting_semaphore semaphore(opencl_compile_process_limit()); + semaphore.acquire(); + + /* Compile. */ + const double starttime = time_dt(); add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false); add_log(string("Build flags: ") + kernel_build_options, true); - if (!system_call_self(args) || !path_exists(clbin)) { + const bool success = system_call_self(args); + const double elapsed = time_dt() - starttime; + + semaphore.release(); + + if (!success || !path_exists(clbin)) { return false; } - double elapsed = time_dt() - starttime; add_log( string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false); @@ -747,6 +776,10 @@ bool OpenCLInfo::device_supported(const string &platform_name, const cl_device_i } VLOG(3) << "OpenCL driver version " << driver_major << "." << driver_minor; + if (getenv("CYCLES_OPENCL_TEST")) { + return true; + } + /* It is possible to have Iris GPU on AMD/Apple OpenCL framework * (aka, it will not be on Intel framework). This isn't supported * and needs an explicit blacklist. @@ -806,18 +839,30 @@ bool OpenCLInfo::platform_version_check(cl_platform_id platform, string *error) return true; } -bool OpenCLInfo::device_version_check(cl_device_id device, string *error) +bool OpenCLInfo::get_device_version(cl_device_id device, int *r_major, int *r_minor, string *error) { - const int req_major = 1, req_minor = 1; - int major, minor; char version[256]; clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, sizeof(version), &version, NULL); - if (sscanf(version, "OpenCL C %d.%d", &major, &minor) < 2) { + if (sscanf(version, "OpenCL C %d.%d", r_major, r_minor) < 2) { if (error != NULL) { *error = string_printf("OpenCL: failed to parse OpenCL C version string (%s).", version); } return false; } + if (error != NULL) { + *error = ""; + } + return true; +} + +bool OpenCLInfo::device_version_check(cl_device_id device, string *error) +{ + const int req_major = 1, req_minor = 1; + int major, minor; + if (!get_device_version(device, &major, &minor, error)) { + return false; + } + if (!((major == req_major && minor >= req_minor) || (major > req_major))) { if (error != NULL) { *error = string_printf("OpenCL: C version 1.1 or later required, found %d.%d", major, minor); @@ -858,7 +903,7 @@ string OpenCLInfo::get_hardware_id(const string &platform_name, cl_device_id dev return ""; } -void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices, bool force_all) +void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices) { const cl_device_type device_type = OpenCLInfo::device_type(); static bool first_time = true; @@ -924,7 +969,7 @@ void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices FIRST_VLOG(2) << "Ignoring device " << device_name << " due to old compiler version."; continue; } - if (force_all || device_supported(platform_name, device_id)) { + if (device_supported(platform_name, device_id)) { cl_device_type device_type; if (!get_device_type(device_id, &device_type, &error)) { FIRST_VLOG(2) << "Ignoring device " << device_name diff --git a/intern/cycles/graph/node.cpp b/intern/cycles/graph/node.cpp index 4f79a7518dc..1439fb5a407 100644 --- a/intern/cycles/graph/node.cpp +++ b/intern/cycles/graph/node.cpp @@ -669,4 +669,14 @@ size_t Node::get_total_size_in_bytes() const return total_size; } +bool Node::is_a(const NodeType *type_) +{ + for (const NodeType *base = type; base; base = base->base) { + if (base == type_) { + return true; + } + } + return false; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/graph/node.h b/intern/cycles/graph/node.h index d35a1bb489c..4473b8aca28 100644 --- a/intern/cycles/graph/node.h +++ b/intern/cycles/graph/node.h @@ -94,6 +94,9 @@ struct Node { /* Get total size of this node. */ size_t get_total_size_in_bytes() const; + /* Type testing, taking into account base classes. */ + bool is_a(const NodeType *type); + ustring name; const NodeType *type; }; diff --git a/intern/cycles/graph/node_type.cpp b/intern/cycles/graph/node_type.cpp index f46d4e48026..0283ed7c817 100644 --- a/intern/cycles/graph/node_type.cpp +++ b/intern/cycles/graph/node_type.cpp @@ -135,8 +135,13 @@ bool SocketType::is_float3(Type type) /* Node Type */ -NodeType::NodeType(Type type_) : type(type_) +NodeType::NodeType(Type type, const NodeType *base) : type(type), base(base) { + if (base) { + /* Inherit sockets. */ + inputs = base->inputs; + outputs = base->outputs; + } } NodeType::~NodeType() @@ -209,7 +214,7 @@ unordered_map<ustring, NodeType, ustringHash> &NodeType::types() return _types; } -NodeType *NodeType::add(const char *name_, CreateFunc create_, Type type_) +NodeType *NodeType::add(const char *name_, CreateFunc create_, Type type_, const NodeType *base_) { ustring name(name_); @@ -219,7 +224,7 @@ NodeType *NodeType::add(const char *name_, CreateFunc create_, Type type_) return NULL; } - types()[name] = NodeType(type_); + types()[name] = NodeType(type_, base_); NodeType *type = &types()[name]; type->name = name; diff --git a/intern/cycles/graph/node_type.h b/intern/cycles/graph/node_type.h index e9496a42658..a79d44b82f3 100644 --- a/intern/cycles/graph/node_type.h +++ b/intern/cycles/graph/node_type.h @@ -103,7 +103,7 @@ struct SocketType { struct NodeType { enum Type { NONE, SHADER }; - explicit NodeType(Type type = NONE); + explicit NodeType(Type type = NONE, const NodeType *base = NULL); ~NodeType(); void register_input(ustring name, @@ -124,11 +124,15 @@ struct NodeType { ustring name; Type type; + const NodeType *base; vector<SocketType, std::allocator<SocketType>> inputs; vector<SocketType, std::allocator<SocketType>> outputs; CreateFunc create; - static NodeType *add(const char *name, CreateFunc create, Type type = NONE); + static NodeType *add(const char *name, + CreateFunc create, + Type type = NONE, + const NodeType *base = NULL); static const NodeType *find(ustring name); static unordered_map<ustring, NodeType, ustringHash> &types(); }; @@ -148,6 +152,14 @@ struct NodeType { } \ template<typename T> const NodeType *structname::register_type() +#define NODE_ABSTRACT_DECLARE \ + template<typename T> static const NodeType *register_base_type(); \ + static const NodeType *node_base_type; + +#define NODE_ABSTRACT_DEFINE(structname) \ + const NodeType *structname::node_base_type = structname::register_base_type<structname>(); \ + template<typename T> const NodeType *structname::register_base_type() + /* Sock Definition Macros */ #define SOCKET_OFFSETOF(T, name) (((char *)&(((T *)1)->name)) - (char *)1) diff --git a/intern/cycles/graph/node_xml.cpp b/intern/cycles/graph/node_xml.cpp index a96970cc904..d333400cc4a 100644 --- a/intern/cycles/graph/node_xml.cpp +++ b/intern/cycles/graph/node_xml.cpp @@ -200,7 +200,7 @@ void xml_read_node(XMLReader &reader, Node *node, xml_node xml_node) map<ustring, Node *>::iterator it = reader.node_map.find(value); if (it != reader.node_map.end()) { Node *value_node = it->second; - if (value_node->type == *(socket.node_type)) + if (value_node->is_a(*(socket.node_type))) node->set(socket, it->second); } break; @@ -215,7 +215,7 @@ void xml_read_node(XMLReader &reader, Node *node, xml_node xml_node) map<ustring, Node *>::iterator it = reader.node_map.find(ustring(tokens[i])); if (it != reader.node_map.end()) { Node *value_node = it->second; - value[i] = (value_node->type == *(socket.node_type)) ? value_node : NULL; + value[i] = (value_node->is_a(*(socket.node_type))) ? value_node : NULL; } else { value[i] = NULL; diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 99172f30b8b..3264b5afea2 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -36,6 +36,10 @@ set(SRC_CUDA_KERNELS ) set(SRC_OPENCL_KERNELS + kernels/opencl/kernel_adaptive_stopping.cl + kernels/opencl/kernel_adaptive_filter_x.cl + kernels/opencl/kernel_adaptive_filter_y.cl + kernels/opencl/kernel_adaptive_adjust_samples.cl kernels/opencl/kernel_bake.cl kernels/opencl/kernel_base.cl kernels/opencl/kernel_displace.cl @@ -94,6 +98,7 @@ set(SRC_BVH_HEADERS set(SRC_HEADERS kernel_accumulate.h + kernel_adaptive_sampling.h kernel_bake.h kernel_camera.h kernel_color.h @@ -228,6 +233,7 @@ set(SRC_SVM_HEADERS svm/svm_fractal_noise.h svm/svm_types.h svm/svm_value.h + svm/svm_vector_rotate.h svm/svm_vector_transform.h svm/svm_voronoi.h svm/svm_voxel.h @@ -323,6 +329,10 @@ set(SRC_UTIL_HEADERS ) set(SRC_SPLIT_HEADERS + split/kernel_adaptive_adjust_samples.h + split/kernel_adaptive_filter_x.h + split/kernel_adaptive_filter_y.h + split/kernel_adaptive_stopping.h split/kernel_branched.h split/kernel_buffer_update.h split/kernel_data_init.h @@ -442,7 +452,7 @@ if(WITH_CYCLES_CUDA_BINARIES) endif() add_custom_command( - OUTPUT ${cuda_cubin} + OUTPUT ${cuda_file} COMMAND ${CUBIN_CC_ENV} "$<TARGET_FILE:cycles_cubin_cc>" -target ${CUDA_ARCH} @@ -451,7 +461,6 @@ if(WITH_CYCLES_CUDA_BINARIES) -v -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}" DEPENDS ${kernel_sources} cycles_cubin_cc) - set(cuda_file ${cuda_cubin}) else() add_custom_command( OUTPUT ${cuda_file} @@ -507,7 +516,6 @@ if(WITH_CYCLES_DEVICE_OPTIX) -I "${OPTIX_INCLUDE_DIR}" -I "${CMAKE_CURRENT_SOURCE_DIR}/.." -I "${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda" - -arch=sm_30 --use_fast_math -o ${output}) @@ -515,25 +523,62 @@ if(WITH_CYCLES_DEVICE_OPTIX) set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__) endif() + if(WITH_CYCLES_CUBIN_COMPILER) - add_custom_command( - OUTPUT - ${output} - DEPENDS - ${input} - ${SRC_HEADERS} - ${SRC_KERNELS_CUDA_HEADERS} - ${SRC_KERNELS_OPTIX_HEADERS} - ${SRC_BVH_HEADERS} - ${SRC_SVM_HEADERS} - ${SRC_GEOM_HEADERS} - ${SRC_CLOSURE_HEADERS} - ${SRC_UTIL_HEADERS} - COMMAND - ${CUDA_NVCC_EXECUTABLE} --ptx ${cuda_flags} ${input} - WORKING_DIRECTORY - "${CMAKE_CURRENT_SOURCE_DIR}") + # Needed to find libnvrtc-builtins.so. Can't do it from inside + # cycles_cubin_cc since the env variable is read before main() + if(APPLE) + set(CUBIN_CC_ENV ${CMAKE_COMMAND} + -E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib") + elseif(UNIX) + set(CUBIN_CC_ENV ${CMAKE_COMMAND} + -E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64") + endif() + add_custom_command( + OUTPUT ${output} + DEPENDS + ${input} + ${SRC_HEADERS} + ${SRC_KERNELS_CUDA_HEADERS} + ${SRC_KERNELS_OPTIX_HEADERS} + ${SRC_BVH_HEADERS} + ${SRC_SVM_HEADERS} + ${SRC_GEOM_HEADERS} + ${SRC_CLOSURE_HEADERS} + ${SRC_UTIL_HEADERS} + COMMAND ${CUBIN_CC_ENV} + "$<TARGET_FILE:cycles_cubin_cc>" + -target 30 + -ptx + -i ${CMAKE_CURRENT_SOURCE_DIR}/${input} + ${cuda_flags} + -v + -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}" + DEPENDS ${kernel_sources} cycles_cubin_cc) + else() + add_custom_command( + OUTPUT + ${output} + DEPENDS + ${input} + ${SRC_HEADERS} + ${SRC_KERNELS_CUDA_HEADERS} + ${SRC_KERNELS_OPTIX_HEADERS} + ${SRC_BVH_HEADERS} + ${SRC_SVM_HEADERS} + ${SRC_GEOM_HEADERS} + ${SRC_CLOSURE_HEADERS} + ${SRC_UTIL_HEADERS} + COMMAND + ${CUDA_NVCC_EXECUTABLE} + --ptx + -arch=sm_30 + ${cuda_flags} + ${input} + WORKING_DIRECTORY + "${CMAKE_CURRENT_SOURCE_DIR}") + endif() list(APPEND optix_ptx ${output}) delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output}" ${CYCLES_INSTALL_PATH}/lib) diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h index 8e17ab9af7a..b3992c03a9a 100644 --- a/intern/cycles/kernel/bvh/bvh.h +++ b/intern/cycles/kernel/bvh/bvh.h @@ -336,7 +336,9 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg, ctx.lcg_state = lcg_state; ctx.max_hits = max_hits; ctx.local_isect = local_isect; - local_isect->num_hits = 0; + if (local_isect) { + local_isect->num_hits = 0; + } ctx.local_object_id = local_object; IntersectContext rtc_ctx(&ctx); RTCRay rtc_ray; @@ -373,7 +375,9 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg, rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray); } - return local_isect->num_hits > 0; + /* rtcOccluded1 sets tfar to -inf if a hit was found. */ + return (local_isect && local_isect->num_hits > 0) || (rtc_ray.tfar < 0); + ; } # endif /* __EMBREE__ */ @@ -439,7 +443,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, ctx.num_hits = 0; IntersectContext rtc_ctx(&ctx); RTCRay rtc_ray; - kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_SHADOW); + kernel_embree_setup_ray(*ray, rtc_ray, visibility); rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray); if (ctx.num_hits > max_hits) { diff --git a/intern/cycles/kernel/bvh/bvh_embree.h b/intern/cycles/kernel/bvh/bvh_embree.h index ffea7d37440..ca637288bee 100644 --- a/intern/cycles/kernel/bvh/bvh_embree.h +++ b/intern/cycles/kernel/bvh/bvh_embree.h @@ -17,9 +17,12 @@ #include <embree3/rtcore_ray.h> #include <embree3/rtcore_scene.h> +// clang-format off #include "kernel/kernel_compat_cpu.h" #include "kernel/split/kernel_split_data_types.h" #include "kernel/kernel_globals.h" +// clang-format on + #include "util/util_vector.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h index b282bf5a350..0a9631ad931 100644 --- a/intern/cycles/kernel/closure/bsdf.h +++ b/intern/cycles/kernel/closure/bsdf.h @@ -14,6 +14,7 @@ * limitations under the License. */ +// clang-format off #include "kernel/closure/bsdf_ashikhmin_velvet.h" #include "kernel/closure/bsdf_diffuse.h" #include "kernel/closure/bsdf_oren_nayar.h" @@ -32,6 +33,7 @@ #include "kernel/closure/bsdf_principled_sheen.h" #include "kernel/closure/bssrdf.h" #include "kernel/closure/volume.h" +// clang-format on CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/closure/bsdf_hair_principled.h b/intern/cycles/kernel/closure/bsdf_hair_principled.h index 4db5a6cc830..f78bbeb5d9d 100644 --- a/intern/cycles/kernel/closure/bsdf_hair_principled.h +++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h @@ -493,6 +493,36 @@ ccl_device void bsdf_principled_hair_blur(ShaderClosure *sc, float roughness) bsdf->m0_roughness = fmaxf(roughness, bsdf->m0_roughness); } +/* Hair Albedo */ + +ccl_device_inline float bsdf_principled_hair_albedo_roughness_scale( + const float azimuthal_roughness) +{ + const float x = azimuthal_roughness; + return (((((0.245f * x) + 5.574f) * x - 10.73f) * x + 2.532f) * x - 0.215f) * x + 5.969f; +} + +ccl_device float3 bsdf_principled_hair_albedo(ShaderClosure *sc) +{ + PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)sc; + return exp3(-sqrt(bsdf->sigma) * bsdf_principled_hair_albedo_roughness_scale(bsdf->v)); +} + +ccl_device_inline float3 +bsdf_principled_hair_sigma_from_reflectance(const float3 color, const float azimuthal_roughness) +{ + const float3 sigma = log3(color) / + bsdf_principled_hair_albedo_roughness_scale(azimuthal_roughness); + return sigma * sigma; +} + +ccl_device_inline float3 bsdf_principled_hair_sigma_from_concentration(const float eumelanin, + const float pheomelanin) +{ + return eumelanin * make_float3(0.506f, 0.841f, 1.653f) + + pheomelanin * make_float3(0.343f, 0.733f, 1.924f); +} + CCL_NAMESPACE_END #endif /* __BSDF_HAIR_PRINCIPLED_H__ */ diff --git a/intern/cycles/kernel/filter/filter_features_sse.h b/intern/cycles/kernel/filter/filter_features_sse.h index 7bbd17066fd..59d4ace2bef 100644 --- a/intern/cycles/kernel/filter/filter_features_sse.h +++ b/intern/cycles/kernel/filter/filter_features_sse.h @@ -109,7 +109,6 @@ ccl_device_inline void filter_calculate_scale_sse(float4 *scale, bool use_time) scale[2] = rcp(max(reduce_max(scale[2]), make_float4(0.01f))); if (use_time) { scale[10] = rcp(max(reduce_max(scale[6]), make_float4(0.01f))); - ; } scale[6] = rcp(max(reduce_max(scale[4]), make_float4(0.01f))); scale[7] = scale[8] = scale[9] = rcp(max(reduce_max(sqrt(scale[5])), make_float4(0.01f))); diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h index e81c1b781c8..5ff4d5f7053 100644 --- a/intern/cycles/kernel/geom/geom.h +++ b/intern/cycles/kernel/geom/geom.h @@ -14,6 +14,7 @@ * limitations under the License. */ +// clang-format off #include "kernel/geom/geom_attribute.h" #include "kernel/geom/geom_object.h" #ifdef __PATCH_EVAL__ @@ -30,3 +31,4 @@ #include "kernel/geom/geom_curve_intersect.h" #include "kernel/geom/geom_volume.h" #include "kernel/geom/geom_primitive.h" +// clang-format on diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h index 456608bfa22..e1b0e6fb81c 100644 --- a/intern/cycles/kernel/geom/geom_attribute.h +++ b/intern/cycles/kernel/geom/geom_attribute.h @@ -29,17 +29,11 @@ ccl_device_inline uint subd_triangle_patch(KernelGlobals *kg, const ShaderData * ccl_device_inline uint attribute_primitive_type(KernelGlobals *kg, const ShaderData *sd) { -#ifdef __HAIR__ - if (sd->type & PRIMITIVE_ALL_CURVE) { - return ATTR_PRIM_CURVE; - } - else -#endif - if (subd_triangle_patch(kg, sd) != ~0) { + if ((sd->type & PRIMITIVE_ALL_TRIANGLE) && subd_triangle_patch(kg, sd) != ~0) { return ATTR_PRIM_SUBD; } else { - return ATTR_PRIM_TRIANGLE; + return ATTR_PRIM_GEOMETRY; } } diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h index e0aacb434eb..928cad58452 100644 --- a/intern/cycles/kernel/geom/geom_curve.h +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -83,6 +83,16 @@ ccl_device float curve_attribute_float( return (1.0f - sd->u) * f0 + sd->u * f1; } + else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) { +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; +# endif + + return kernel_tex_fetch(__attributes_float, desc.offset); + } else { # ifdef __RAY_DIFFERENTIALS__ if (dx) @@ -133,6 +143,16 @@ ccl_device float2 curve_attribute_float2(KernelGlobals *kg, return (1.0f - sd->u) * f0 + sd->u * f1; } + else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) { +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); +# endif + + return kernel_tex_fetch(__attributes_float2, desc.offset); + } else { # ifdef __RAY_DIFFERENTIALS__ if (dx) @@ -183,6 +203,16 @@ ccl_device float3 curve_attribute_float3(KernelGlobals *kg, return (1.0f - sd->u) * f0 + sd->u * f1; } + else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) { +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); +# endif + + return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset)); + } else { # ifdef __RAY_DIFFERENTIALS__ if (dx) diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h index 7380c506bf4..0e2a00e9d2e 100644 --- a/intern/cycles/kernel/geom/geom_motion_curve.h +++ b/intern/cycles/kernel/geom/geom_motion_curve.h @@ -36,7 +36,7 @@ ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg, * zero iterations and rendering is really slow with motion curves. For until other * areas are speed up it's probably not so crucial to optimize this out. */ - uint attr_offset = object_attribute_map_offset(kg, object) + ATTR_PRIM_CURVE; + uint attr_offset = object_attribute_map_offset(kg, object) + ATTR_PRIM_GEOMETRY; uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); while (attr_map.x != id) { diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h index 49d4829af38..859d919f0bb 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h @@ -103,17 +103,21 @@ ccl_device_inline const Ray *ray, float3 verts[3]) { +# ifdef __KERNEL_OPTIX__ + /* isect->t is always in world space with OptiX. */ + return motion_triangle_refine(kg, sd, isect, ray, verts); +# else float3 P = ray->P; float3 D = ray->D; float t = isect->t; -# ifdef __INTERSECTION_REFINE__ +# ifdef __INTERSECTION_REFINE__ if (isect->object != OBJECT_NONE) { -# ifdef __OBJECT_MOTION__ +# ifdef __OBJECT_MOTION__ Transform tfm = sd->ob_itfm; -# else +# else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); -# endif +# endif P = transform_point(&tfm, P); D = transform_direction(&tfm, D); @@ -135,19 +139,20 @@ ccl_device_inline P = P + D * rt; if (isect->object != OBJECT_NONE) { -# ifdef __OBJECT_MOTION__ +# ifdef __OBJECT_MOTION__ Transform tfm = sd->ob_tfm; -# else +# else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); -# endif +# endif P = transform_point(&tfm, P); } return P; -# else /* __INTERSECTION_REFINE__ */ +# else /* __INTERSECTION_REFINE__ */ return P + D * t; -# endif /* __INTERSECTION_REFINE__ */ +# endif /* __INTERSECTION_REFINE__ */ +# endif } #endif /* __BVH_LOCAL__ */ diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h index af4e6fbd89b..3aa68e1f84e 100644 --- a/intern/cycles/kernel/geom/geom_object.h +++ b/intern/cycles/kernel/geom/geom_object.h @@ -81,13 +81,7 @@ ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, const uint num_steps = kernel_tex_fetch(__objects, object).numsteps * 2 + 1; Transform tfm; -# ifdef __EMBREE__ - if (kernel_data.bvh.scene) { - transform_motion_array_interpolate_straight(&tfm, motion, num_steps, time); - } - else -# endif - transform_motion_array_interpolate(&tfm, motion, num_steps, time); + transform_motion_array_interpolate(&tfm, motion, num_steps, time); return tfm; } @@ -326,6 +320,26 @@ ccl_device_inline uint object_patch_map_offset(KernelGlobals *kg, int object) return kernel_tex_fetch(__objects, object).patch_map_offset; } +/* Volume step size */ + +ccl_device_inline float object_volume_density(KernelGlobals *kg, int object) +{ + if (object == OBJECT_NONE) { + return 1.0f; + } + + return kernel_tex_fetch(__objects, object).surface_area; +} + +ccl_device_inline float object_volume_step_size(KernelGlobals *kg, int object) +{ + if (object == OBJECT_NONE) { + return kernel_data.background.volume_step_size; + } + + return kernel_tex_fetch(__object_volume_step, object); +} + /* Pass ID for shader */ ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd) diff --git a/intern/cycles/kernel/geom/geom_subd_triangle.h b/intern/cycles/kernel/geom/geom_subd_triangle.h index 81bac6e6ee1..3eef9857ae3 100644 --- a/intern/cycles/kernel/geom/geom_subd_triangle.h +++ b/intern/cycles/kernel/geom/geom_subd_triangle.h @@ -217,6 +217,14 @@ ccl_device_noinline float subd_triangle_attribute_float( return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; } + else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) { + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; + + return kernel_tex_fetch(__attributes_float, desc.offset); + } else { if (dx) *dx = 0.0f; @@ -352,6 +360,14 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals *kg, return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; } + else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) { + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); + + return kernel_tex_fetch(__attributes_float2, desc.offset); + } else { if (dx) *dx = make_float2(0.0f, 0.0f); @@ -486,6 +502,14 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg, return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; } + else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) { + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); + + return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset)); + } else { if (dx) *dx = make_float3(0.0f, 0.0f, 0.0f); @@ -584,6 +608,14 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals *kg, return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; } + else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) { + if (dx) + *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + + return color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, desc.offset)); + } else { if (dx) *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h index fdb7f655f64..a2731bf2bd0 100644 --- a/intern/cycles/kernel/geom/geom_triangle.h +++ b/intern/cycles/kernel/geom/geom_triangle.h @@ -153,6 +153,14 @@ ccl_device float triangle_attribute_float( return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; } + else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) { + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; + + return kernel_tex_fetch(__attributes_float, desc.offset); + } else { if (dx) *dx = 0.0f; @@ -212,6 +220,14 @@ ccl_device float2 triangle_attribute_float2(KernelGlobals *kg, return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; } + else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) { + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); + + return kernel_tex_fetch(__attributes_float2, desc.offset); + } else { if (dx) *dx = make_float2(0.0f, 0.0f); @@ -272,6 +288,14 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; } + else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) { + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); + + return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset)); + } else { if (dx) *dx = make_float3(0.0f, 0.0f, 0.0f); @@ -304,6 +328,14 @@ ccl_device float4 triangle_attribute_float4(KernelGlobals *kg, return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; } + else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) { + if (dx) + *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + + return color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, desc.offset)); + } else { if (dx) *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h index 68075199402..6604806f73b 100644 --- a/intern/cycles/kernel/geom/geom_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h @@ -690,16 +690,20 @@ ccl_device_inline float3 triangle_refine_local(KernelGlobals *kg, const Intersection *isect, const Ray *ray) { +#ifdef __KERNEL_OPTIX__ + /* isect->t is always in world space with OptiX. */ + return triangle_refine(kg, sd, isect, ray); +#else float3 P = ray->P; float3 D = ray->D; float t = isect->t; if (isect->object != OBJECT_NONE) { -#ifdef __OBJECT_MOTION__ +# ifdef __OBJECT_MOTION__ Transform tfm = sd->ob_itfm; -#else +# else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); -#endif +# endif P = transform_point(&tfm, P); D = transform_direction(&tfm, D); @@ -708,7 +712,7 @@ ccl_device_inline float3 triangle_refine_local(KernelGlobals *kg, P = P + D * t; -#ifdef __INTERSECTION_REFINE__ +# ifdef __INTERSECTION_REFINE__ const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim); const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0), tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1), @@ -728,19 +732,20 @@ ccl_device_inline float3 triangle_refine_local(KernelGlobals *kg, float rt = dot(edge2, qvec) / det; P = P + D * rt; } -#endif /* __INTERSECTION_REFINE__ */ +# endif /* __INTERSECTION_REFINE__ */ if (isect->object != OBJECT_NONE) { -#ifdef __OBJECT_MOTION__ +# ifdef __OBJECT_MOTION__ Transform tfm = sd->ob_tfm; -#else +# else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); -#endif +# endif P = transform_point(&tfm, P); } return P; +#endif } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h index 96cf35a40dc..f43a7841b46 100644 --- a/intern/cycles/kernel/geom/geom_volume.h +++ b/intern/cycles/kernel/geom/geom_volume.h @@ -51,10 +51,14 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc) { - float3 P = volume_normalized_position(kg, sd, sd->P); + /* todo: optimize this so we don't have to transform both here and in + * kernel_tex_image_interp_3d when possible. Also could optimize for the + * common case where transform is translation/scale only. */ + float3 P = sd->P; + object_inverse_position_transform(kg, sd, &P); InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC : INTERPOLATION_NONE; - float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp); + float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P, interp); return average(float4_to_float3(r)); } @@ -62,10 +66,11 @@ ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc) { - float3 P = volume_normalized_position(kg, sd, sd->P); + float3 P = sd->P; + object_inverse_position_transform(kg, sd, &P); InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC : INTERPOLATION_NONE; - float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp); + float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P, interp); if (r.w > 1e-6f && r.w != 1.0f) { /* For RGBA colors, unpremultiply after interpolation. */ diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index dfdd8843f29..b907c6a2bac 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -19,8 +19,8 @@ /* CPU Kernel Interface */ -#include "util/util_types.h" #include "kernel/kernel_types.h" +#include "util/util_types.h" CCL_NAMESPACE_BEGIN @@ -38,7 +38,7 @@ void *kernel_osl_memory(KernelGlobals *kg); bool kernel_osl_use(KernelGlobals *kg); void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size); -void kernel_tex_copy(KernelGlobals *kg, const char *name, void *mem, size_t size); +void kernel_global_memory_copy(KernelGlobals *kg, const char *name, void *mem, size_t size); #define KERNEL_ARCH cpu #include "kernel/kernels/cpu/kernel_cpu.h" diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h index 606c288649a..79ea03f4f6f 100644 --- a/intern/cycles/kernel/kernel_accumulate.h +++ b/intern/cycles/kernel/kernel_accumulate.h @@ -36,21 +36,18 @@ ccl_device_inline void bsdf_eval_init(BsdfEval *eval, eval->glossy = make_float3(0.0f, 0.0f, 0.0f); eval->transmission = make_float3(0.0f, 0.0f, 0.0f); eval->transparent = make_float3(0.0f, 0.0f, 0.0f); - eval->subsurface = make_float3(0.0f, 0.0f, 0.0f); - eval->scatter = make_float3(0.0f, 0.0f, 0.0f); + eval->volume = make_float3(0.0f, 0.0f, 0.0f); if (type == CLOSURE_BSDF_TRANSPARENT_ID) eval->transparent = value; - else if (CLOSURE_IS_BSDF_DIFFUSE(type)) + else if (CLOSURE_IS_BSDF_DIFFUSE(type) || CLOSURE_IS_BSDF_BSSRDF(type)) eval->diffuse = value; else if (CLOSURE_IS_BSDF_GLOSSY(type)) eval->glossy = value; else if (CLOSURE_IS_BSDF_TRANSMISSION(type)) eval->transmission = value; - else if (CLOSURE_IS_BSDF_BSSRDF(type)) - eval->subsurface = value; else if (CLOSURE_IS_PHASE(type)) - eval->scatter = value; + eval->volume = value; } else #endif @@ -73,16 +70,14 @@ ccl_device_inline void bsdf_eval_accum(BsdfEval *eval, value *= mis_weight; #ifdef __PASSES__ if (eval->use_light_pass) { - if (CLOSURE_IS_BSDF_DIFFUSE(type)) + if (CLOSURE_IS_BSDF_DIFFUSE(type) || CLOSURE_IS_BSDF_BSSRDF(type)) eval->diffuse += value; else if (CLOSURE_IS_BSDF_GLOSSY(type)) eval->glossy += value; else if (CLOSURE_IS_BSDF_TRANSMISSION(type)) eval->transmission += value; - else if (CLOSURE_IS_BSDF_BSSRDF(type)) - eval->subsurface += value; else if (CLOSURE_IS_PHASE(type)) - eval->scatter += value; + eval->volume += value; /* skipping transparent, this function is used by for eval(), will be zero then */ } @@ -98,7 +93,7 @@ ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval) #ifdef __PASSES__ if (eval->use_light_pass) { return is_zero(eval->diffuse) && is_zero(eval->glossy) && is_zero(eval->transmission) && - is_zero(eval->transparent) && is_zero(eval->subsurface) && is_zero(eval->scatter); + is_zero(eval->transparent) && is_zero(eval->volume); } else #endif @@ -114,8 +109,7 @@ ccl_device_inline void bsdf_eval_mis(BsdfEval *eval, float value) eval->diffuse *= value; eval->glossy *= value; eval->transmission *= value; - eval->subsurface *= value; - eval->scatter *= value; + eval->volume *= value; /* skipping transparent, this function is used by for eval(), will be zero then */ } @@ -144,8 +138,7 @@ ccl_device_inline void bsdf_eval_mul3(BsdfEval *eval, float3 value) eval->diffuse *= value; eval->glossy *= value; eval->transmission *= value; - eval->subsurface *= value; - eval->scatter *= value; + eval->volume *= value; /* skipping transparent, this function is used by for eval(), will be zero then */ } @@ -160,7 +153,7 @@ ccl_device_inline float3 bsdf_eval_sum(const BsdfEval *eval) { #ifdef __PASSES__ if (eval->use_light_pass) { - return eval->diffuse + eval->glossy + eval->transmission + eval->subsurface + eval->scatter; + return eval->diffuse + eval->glossy + eval->transmission + eval->volume; } else #endif @@ -187,19 +180,16 @@ ccl_device_inline void path_radiance_init(KernelGlobals *kg, PathRadiance *L) L->color_diffuse = make_float3(0.0f, 0.0f, 0.0f); L->color_glossy = make_float3(0.0f, 0.0f, 0.0f); L->color_transmission = make_float3(0.0f, 0.0f, 0.0f); - L->color_subsurface = make_float3(0.0f, 0.0f, 0.0f); L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f); L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f); L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f); - L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f); - L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f); + L->direct_volume = make_float3(0.0f, 0.0f, 0.0f); L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f); L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f); L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f); - L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f); - L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f); + L->indirect_volume = make_float3(0.0f, 0.0f, 0.0f); L->transparent = 0.0f; L->emission = make_float3(0.0f, 0.0f, 0.0f); @@ -211,8 +201,7 @@ ccl_device_inline void path_radiance_init(KernelGlobals *kg, PathRadiance *L) L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f); L->state.glossy = make_float3(0.0f, 0.0f, 0.0f); L->state.transmission = make_float3(0.0f, 0.0f, 0.0f); - L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f); - L->state.scatter = make_float3(0.0f, 0.0f, 0.0f); + L->state.volume = make_float3(0.0f, 0.0f, 0.0f); L->state.direct = make_float3(0.0f, 0.0f, 0.0f); } else @@ -264,11 +253,9 @@ ccl_device_inline void path_radiance_bsdf_bounce(KernelGlobals *kg, L_state->diffuse = bsdf_eval->diffuse * value; L_state->glossy = bsdf_eval->glossy * value; L_state->transmission = bsdf_eval->transmission * value; - L_state->subsurface = bsdf_eval->subsurface * value; - L_state->scatter = bsdf_eval->scatter * value; + L_state->volume = bsdf_eval->volume * value; - *throughput = L_state->diffuse + L_state->glossy + L_state->transmission + - L_state->subsurface + L_state->scatter; + *throughput = L_state->diffuse + L_state->glossy + L_state->transmission + L_state->volume; L_state->direct = *throughput; } @@ -449,8 +436,7 @@ ccl_device_inline void path_radiance_accum_light(KernelGlobals *kg, L->direct_diffuse += shaded_throughput * bsdf_eval->diffuse; L->direct_glossy += shaded_throughput * bsdf_eval->glossy; L->direct_transmission += shaded_throughput * bsdf_eval->transmission; - L->direct_subsurface += shaded_throughput * bsdf_eval->subsurface; - L->direct_scatter += shaded_throughput * bsdf_eval->scatter; + L->direct_volume += shaded_throughput * bsdf_eval->volume; if (is_lamp) { L->shadow.x += shadow.x * shadow_fac; @@ -528,7 +514,8 @@ ccl_device_inline void path_radiance_accum_background(KernelGlobals *kg, } #ifdef __DENOISING_FEATURES__ - L->denoising_albedo += state->denoising_feature_weight * value; + L->denoising_albedo += state->denoising_feature_weight * state->denoising_feature_throughput * + value; #endif /* __DENOISING_FEATURES__ */ } @@ -561,15 +548,13 @@ ccl_device_inline void path_radiance_sum_indirect(PathRadiance *L) L->direct_diffuse += L->state.diffuse * L->direct_emission; L->direct_glossy += L->state.glossy * L->direct_emission; L->direct_transmission += L->state.transmission * L->direct_emission; - L->direct_subsurface += L->state.subsurface * L->direct_emission; - L->direct_scatter += L->state.scatter * L->direct_emission; + L->direct_volume += L->state.volume * L->direct_emission; L->indirect = safe_divide_color(L->indirect, L->state.direct); L->indirect_diffuse += L->state.diffuse * L->indirect; L->indirect_glossy += L->state.glossy * L->indirect; L->indirect_transmission += L->state.transmission * L->indirect; - L->indirect_subsurface += L->state.subsurface * L->indirect; - L->indirect_scatter += L->state.scatter * L->indirect; + L->indirect_volume += L->state.volume * L->indirect; } #endif } @@ -581,8 +566,7 @@ ccl_device_inline void path_radiance_reset_indirect(PathRadiance *L) L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f); L->state.glossy = make_float3(0.0f, 0.0f, 0.0f); L->state.transmission = make_float3(0.0f, 0.0f, 0.0f); - L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f); - L->state.scatter = make_float3(0.0f, 0.0f, 0.0f); + L->state.volume = make_float3(0.0f, 0.0f, 0.0f); L->direct_emission = make_float3(0.0f, 0.0f, 0.0f); L->indirect = make_float3(0.0f, 0.0f, 0.0f); @@ -646,10 +630,10 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, if (L->use_light_pass) { path_radiance_sum_indirect(L); - L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission + - L->direct_subsurface + L->direct_scatter + L->emission; + L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission + L->direct_volume + + L->emission; L_indirect = L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission + - L->indirect_subsurface + L->indirect_scatter; + L->indirect_volume; if (!kernel_data.background.transparent) L_direct += L->background; @@ -665,14 +649,12 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f); L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f); L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f); - L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f); - L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f); + L->direct_volume = make_float3(0.0f, 0.0f, 0.0f); L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f); L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f); L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f); - L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f); - L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f); + L->indirect_volume = make_float3(0.0f, 0.0f, 0.0f); L->emission = make_float3(0.0f, 0.0f, 0.0f); } @@ -714,7 +696,7 @@ ccl_device_inline void path_radiance_split_denoising(KernelGlobals *kg, kernel_assert(L->use_light_pass); *clean = L->emission + L->background; - *noisy = L->direct_scatter + L->indirect_scatter; + *noisy = L->direct_volume + L->indirect_volume; # define ADD_COMPONENT(flag, component) \ if (kernel_data.film.denoising_flags & flag) \ @@ -728,8 +710,6 @@ ccl_device_inline void path_radiance_split_denoising(KernelGlobals *kg, ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_IND, L->indirect_glossy); ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_DIR, L->direct_transmission); ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_IND, L->indirect_transmission); - ADD_COMPONENT(DENOISING_CLEAN_SUBSURFACE_DIR, L->direct_subsurface); - ADD_COMPONENT(DENOISING_CLEAN_SUBSURFACE_IND, L->indirect_subsurface); # undef ADD_COMPONENT #else *noisy = L->emission; @@ -766,14 +746,12 @@ ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance safe_float3_add(L->direct_diffuse, L_sample->direct_diffuse); safe_float3_add(L->direct_glossy, L_sample->direct_glossy); safe_float3_add(L->direct_transmission, L_sample->direct_transmission); - safe_float3_add(L->direct_subsurface, L_sample->direct_subsurface); - safe_float3_add(L->direct_scatter, L_sample->direct_scatter); + safe_float3_add(L->direct_volume, L_sample->direct_volume); safe_float3_add(L->indirect_diffuse, L_sample->indirect_diffuse); safe_float3_add(L->indirect_glossy, L_sample->indirect_glossy); safe_float3_add(L->indirect_transmission, L_sample->indirect_transmission); - safe_float3_add(L->indirect_subsurface, L_sample->indirect_subsurface); - safe_float3_add(L->indirect_scatter, L_sample->indirect_scatter); + safe_float3_add(L->indirect_volume, L_sample->indirect_volume); safe_float3_add(L->background, L_sample->background); safe_float3_add(L->ao, L_sample->ao); diff --git a/intern/cycles/kernel/kernel_adaptive_sampling.h b/intern/cycles/kernel/kernel_adaptive_sampling.h new file mode 100644 index 00000000000..047fe8c92ec --- /dev/null +++ b/intern/cycles/kernel/kernel_adaptive_sampling.h @@ -0,0 +1,230 @@ +/* + * Copyright 2019 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __KERNEL_ADAPTIVE_SAMPLING_H__ +#define __KERNEL_ADAPTIVE_SAMPLING_H__ + +CCL_NAMESPACE_BEGIN + +/* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */ + +ccl_device void kernel_do_adaptive_stopping(KernelGlobals *kg, + ccl_global float *buffer, + int sample) +{ + /* TODO Stefan: Is this better in linear, sRGB or something else? */ + float4 I = *((ccl_global float4 *)buffer); + float4 A = *(ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer); + /* The per pixel error as seen in section 2.1 of + * "A hierarchical automatic stopping condition for Monte Carlo global illumination" + * A small epsilon is added to the divisor to prevent division by zero. */ + float error = (fabsf(I.x - A.x) + fabsf(I.y - A.y) + fabsf(I.z - A.z)) / + (sample * 0.0001f + sqrtf(I.x + I.y + I.z)); + if (error < kernel_data.integrator.adaptive_threshold * (float)sample) { + /* Set the fourth component to non-zero value to indicate that this pixel has converged. */ + buffer[kernel_data.film.pass_adaptive_aux_buffer + 3] += 1.0f; + } +} + +/* Adjust the values of an adaptively sampled pixel. */ + +ccl_device void kernel_adaptive_post_adjust(KernelGlobals *kg, + ccl_global float *buffer, + float sample_multiplier) +{ + *(ccl_global float4 *)(buffer) *= sample_multiplier; + + /* Scale the aux pass too, this is necessary for progressive rendering to work properly. */ + kernel_assert(kernel_data.film.pass_adaptive_aux_buffer); + *(ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer) *= sample_multiplier; + +#ifdef __PASSES__ + int flag = kernel_data.film.pass_flag; + + if (flag & PASSMASK(NORMAL)) + *(ccl_global float3 *)(buffer + kernel_data.film.pass_normal) *= sample_multiplier; + + if (flag & PASSMASK(UV)) + *(ccl_global float3 *)(buffer + kernel_data.film.pass_uv) *= sample_multiplier; + + if (flag & PASSMASK(MOTION)) { + *(ccl_global float4 *)(buffer + kernel_data.film.pass_motion) *= sample_multiplier; + *(ccl_global float *)(buffer + kernel_data.film.pass_motion_weight) *= sample_multiplier; + } + + if (kernel_data.film.use_light_pass) { + int light_flag = kernel_data.film.light_pass_flag; + + if (light_flag & PASSMASK(MIST)) + *(ccl_global float *)(buffer + kernel_data.film.pass_mist) *= sample_multiplier; + + /* Shadow pass omitted on purpose. It has its own scale parameter. */ + + if (light_flag & PASSMASK(DIFFUSE_INDIRECT)) + *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_indirect) *= sample_multiplier; + if (light_flag & PASSMASK(GLOSSY_INDIRECT)) + *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_indirect) *= sample_multiplier; + if (light_flag & PASSMASK(TRANSMISSION_INDIRECT)) + *(ccl_global float3 *)(buffer + + kernel_data.film.pass_transmission_indirect) *= sample_multiplier; + if (light_flag & PASSMASK(VOLUME_INDIRECT)) + *(ccl_global float3 *)(buffer + kernel_data.film.pass_volume_indirect) *= sample_multiplier; + if (light_flag & PASSMASK(DIFFUSE_DIRECT)) + *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_direct) *= sample_multiplier; + if (light_flag & PASSMASK(GLOSSY_DIRECT)) + *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_direct) *= sample_multiplier; + if (light_flag & PASSMASK(TRANSMISSION_DIRECT)) + *(ccl_global float3 *)(buffer + + kernel_data.film.pass_transmission_direct) *= sample_multiplier; + if (light_flag & PASSMASK(VOLUME_DIRECT)) + *(ccl_global float3 *)(buffer + kernel_data.film.pass_volume_direct) *= sample_multiplier; + + if (light_flag & PASSMASK(EMISSION)) + *(ccl_global float3 *)(buffer + kernel_data.film.pass_emission) *= sample_multiplier; + if (light_flag & PASSMASK(BACKGROUND)) + *(ccl_global float3 *)(buffer + kernel_data.film.pass_background) *= sample_multiplier; + if (light_flag & PASSMASK(AO)) + *(ccl_global float3 *)(buffer + kernel_data.film.pass_ao) *= sample_multiplier; + + if (light_flag & PASSMASK(DIFFUSE_COLOR)) + *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_color) *= sample_multiplier; + if (light_flag & PASSMASK(GLOSSY_COLOR)) + *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_color) *= sample_multiplier; + if (light_flag & PASSMASK(TRANSMISSION_COLOR)) + *(ccl_global float3 *)(buffer + + kernel_data.film.pass_transmission_color) *= sample_multiplier; + } +#endif + +#ifdef __DENOISING_FEATURES__ + +# define scale_float3_variance(buffer, offset, scale) \ + *(buffer + offset) *= scale; \ + *(buffer + offset + 1) *= scale; \ + *(buffer + offset + 2) *= scale; \ + *(buffer + offset + 3) *= scale * scale; \ + *(buffer + offset + 4) *= scale * scale; \ + *(buffer + offset + 5) *= scale * scale; + +# define scale_shadow_variance(buffer, offset, scale) \ + *(buffer + offset) *= scale; \ + *(buffer + offset + 1) *= scale; \ + *(buffer + offset + 2) *= scale * scale; + + if (kernel_data.film.pass_denoising_data) { + scale_shadow_variance( + buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_SHADOW_A, sample_multiplier); + scale_shadow_variance( + buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_SHADOW_B, sample_multiplier); + if (kernel_data.film.pass_denoising_clean) { + scale_float3_variance( + buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, sample_multiplier); + *(buffer + kernel_data.film.pass_denoising_clean) *= sample_multiplier; + *(buffer + kernel_data.film.pass_denoising_clean + 1) *= sample_multiplier; + *(buffer + kernel_data.film.pass_denoising_clean + 2) *= sample_multiplier; + } + else { + scale_float3_variance( + buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, sample_multiplier); + } + scale_float3_variance( + buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL, sample_multiplier); + scale_float3_variance( + buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO, sample_multiplier); + *(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH) *= sample_multiplier; + *(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH + + 1) *= sample_multiplier * sample_multiplier; + } +#endif /* __DENOISING_FEATURES__ */ + + if (kernel_data.film.cryptomatte_passes) { + int num_slots = 0; + num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) ? 1 : 0; + num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) ? 1 : 0; + num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) ? 1 : 0; + num_slots = num_slots * 2 * kernel_data.film.cryptomatte_depth; + ccl_global float2 *id_buffer = (ccl_global float2 *)(buffer + + kernel_data.film.pass_cryptomatte); + for (int slot = 0; slot < num_slots; slot++) { + id_buffer[slot].y *= sample_multiplier; + } + } +} + +/* This is a simple box filter in two passes. + * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */ + +ccl_device bool kernel_do_adaptive_filter_x(KernelGlobals *kg, int y, ccl_global WorkTile *tile) +{ + bool any = false; + bool prev = false; + for (int x = tile->x; x < tile->x + tile->w; ++x) { + int index = tile->offset + x + y * tile->stride; + ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride; + ccl_global float4 *aux = (ccl_global float4 *)(buffer + + kernel_data.film.pass_adaptive_aux_buffer); + if (aux->w == 0.0f) { + any = true; + if (x > tile->x && !prev) { + index = index - 1; + buffer = tile->buffer + index * kernel_data.film.pass_stride; + aux = (ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer); + aux->w = 0.0f; + } + prev = true; + } + else { + if (prev) { + aux->w = 0.0f; + } + prev = false; + } + } + return any; +} + +ccl_device bool kernel_do_adaptive_filter_y(KernelGlobals *kg, int x, ccl_global WorkTile *tile) +{ + bool prev = false; + bool any = false; + for (int y = tile->y; y < tile->y + tile->h; ++y) { + int index = tile->offset + x + y * tile->stride; + ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride; + ccl_global float4 *aux = (ccl_global float4 *)(buffer + + kernel_data.film.pass_adaptive_aux_buffer); + if (aux->w == 0.0f) { + any = true; + if (y > tile->y && !prev) { + index = index - tile->stride; + buffer = tile->buffer + index * kernel_data.film.pass_stride; + aux = (ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer); + aux->w = 0.0f; + } + prev = true; + } + else { + if (prev) { + aux->w = 0.0f; + } + prev = false; + } + } + return any; +} + +CCL_NAMESPACE_END + +#endif /* __KERNEL_ADAPTIVE_SAMPLING_H__ */ diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index a349b225abb..f1fc697553a 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -71,7 +71,7 @@ ccl_device_inline void compute_light_pass( # ifdef __SUBSURFACE__ /* sample subsurface scattering */ - if ((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) { + if ((pass_filter & BAKE_FILTER_DIFFUSE) && (sd->flag & SD_BSSRDF)) { /* When mixing BSSRDF and BSDF closures we should skip BSDF lighting * if scattering was successful. */ SubsurfaceIndirectRays ss_indirect; @@ -123,7 +123,7 @@ ccl_device_inline void compute_light_pass( # ifdef __SUBSURFACE__ /* sample subsurface scattering */ - if ((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) { + if ((pass_filter & BAKE_FILTER_DIFFUSE) && (sd->flag & SD_BSSRDF)) { /* When mixing BSSRDF and BSDF closures we should skip BSDF lighting * if scattering was successful. */ kernel_branched_path_subsurface_scatter( @@ -178,10 +178,6 @@ ccl_device_inline float3 kernel_bake_shader_bsdf(KernelGlobals *kg, return shader_bsdf_glossy(kg, sd); case SHADER_EVAL_TRANSMISSION: return shader_bsdf_transmission(kg, sd); -# ifdef __SUBSURFACE__ - case SHADER_EVAL_SUBSURFACE: - return shader_bsdf_subsurface(kg, sd); -# endif default: kernel_assert(!"Unknown bake type passed to BSDF evaluate"); return make_float3(0.0f, 0.0f, 0.0f); @@ -385,11 +381,6 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, if ((pass_filter & BAKE_FILTER_TRANSMISSION_INDIRECT) == BAKE_FILTER_TRANSMISSION_INDIRECT) out += L.indirect_transmission; - if ((pass_filter & BAKE_FILTER_SUBSURFACE_DIRECT) == BAKE_FILTER_SUBSURFACE_DIRECT) - out += L.direct_subsurface; - if ((pass_filter & BAKE_FILTER_SUBSURFACE_INDIRECT) == BAKE_FILTER_SUBSURFACE_INDIRECT) - out += L.indirect_subsurface; - if ((pass_filter & BAKE_FILTER_EMISSION) != 0) out += L.emission; @@ -414,13 +405,6 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, kg, &sd, &state, L.direct_transmission, L.indirect_transmission, type, pass_filter); break; } - case SHADER_EVAL_SUBSURFACE: { -# ifdef __SUBSURFACE__ - out = kernel_bake_evaluate_direct_indirect( - kg, &sd, &state, L.direct_subsurface, L.indirect_subsurface, type, pass_filter); -# endif - break; - } # endif /* extra */ diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index 006dd00dd73..88f6a264a5a 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -35,11 +35,11 @@ # define __NODES_FEATURES__ NODE_FEATURE_ALL #endif +#include "util/util_half.h" #include "util/util_math.h" #include "util/util_simd.h" -#include "util/util_half.h" -#include "util/util_types.h" #include "util/util_texture.h" +#include "util/util_types.h" #define ccl_addr_space diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index 4f508d7cdaa..3c5a10540d5 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -37,8 +37,11 @@ typedef unsigned long long uint64_t; typedef unsigned short half; typedef unsigned long long CUtexObject; -#define FLT_MIN 1.175494350822287507969e-38f -#define FLT_MAX 340282346638528859811704183484516925440.0f +#ifdef CYCLES_CUBIN_CC +# define FLT_MIN 1.175494350822287507969e-38f +# define FLT_MAX 340282346638528859811704183484516925440.0f +# define FLT_EPSILON 1.192092896e-07F +#endif __device__ half __float2half(const float f) { diff --git a/intern/cycles/kernel/kernel_compat_optix.h b/intern/cycles/kernel/kernel_compat_optix.h index 61b9d87a020..7068acc3a32 100644 --- a/intern/cycles/kernel/kernel_compat_optix.h +++ b/intern/cycles/kernel/kernel_compat_optix.h @@ -35,9 +35,11 @@ typedef unsigned int uint32_t; typedef unsigned long long uint64_t; typedef unsigned short half; typedef unsigned long long CUtexObject; - -#define FLT_MIN 1.175494350822287507969e-38f -#define FLT_MAX 340282346638528859811704183484516925440.0f +#ifdef CYCLES_CUBIN_CC +# define FLT_MIN 1.175494350822287507969e-38f +# define FLT_MAX 340282346638528859811704183484516925440.0f +# define FLT_EPSILON 1.192092896e-07F +#endif __device__ half __float2half(const float f) { diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h index c63d1149d03..71b176a0a8f 100644 --- a/intern/cycles/kernel/kernel_emission.h +++ b/intern/cycles/kernel/kernel_emission.h @@ -145,16 +145,14 @@ ccl_device_noinline_cpu bool direct_emission(KernelGlobals *kg, #ifdef __PASSES__ /* use visibility flag to skip lights */ if (ls->shader & SHADER_EXCLUDE_ANY) { - if (ls->shader & SHADER_EXCLUDE_DIFFUSE) { + if (ls->shader & SHADER_EXCLUDE_DIFFUSE) eval->diffuse = make_float3(0.0f, 0.0f, 0.0f); - eval->subsurface = make_float3(0.0f, 0.0f, 0.0f); - } if (ls->shader & SHADER_EXCLUDE_GLOSSY) eval->glossy = make_float3(0.0f, 0.0f, 0.0f); if (ls->shader & SHADER_EXCLUDE_TRANSMIT) eval->transmission = make_float3(0.0f, 0.0f, 0.0f); if (ls->shader & SHADER_EXCLUDE_SCATTER) - eval->scatter = make_float3(0.0f, 0.0f, 0.0f); + eval->volume = make_float3(0.0f, 0.0f, 0.0f); } #endif diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h index fc3a6152b79..3829426f261 100644 --- a/intern/cycles/kernel/kernel_film.h +++ b/intern/cycles/kernel/kernel_film.h @@ -40,15 +40,9 @@ ccl_device float4 film_get_pass_result(KernelGlobals *kg, if (display_divide_pass_stride != -1) { ccl_global float4 *divide_in = (ccl_global float4 *)(buffer + display_divide_pass_stride + index * kernel_data.film.pass_stride); - if (divide_in->x != 0.0f) { - pass_result.x /= divide_in->x; - } - if (divide_in->y != 0.0f) { - pass_result.y /= divide_in->y; - } - if (divide_in->z != 0.0f) { - pass_result.z /= divide_in->z; - } + float3 divided = safe_divide_even_color(float4_to_float3(pass_result), + float4_to_float3(*divide_in)); + pass_result = make_float4(divided.x, divided.y, divided.z, pass_result.w); } if (kernel_data.film.use_display_exposure) { diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index a440021b6b9..c186e8560eb 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -22,8 +22,8 @@ #include "kernel/kernel_profiling.h" #ifdef __KERNEL_CPU__ -# include "util/util_vector.h" # include "util/util_map.h" +# include "util/util_vector.h" #endif #ifdef __KERNEL_OPENCL__ diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h index e59d8946950..5b6e3bbf501 100644 --- a/intern/cycles/kernel/kernel_jitter.h +++ b/intern/cycles/kernel/kernel_jitter.h @@ -195,4 +195,36 @@ ccl_device void cmj_sample_2D(int s, int N, int p, float *fx, float *fy) } #endif +ccl_device float pmj_sample_1D(KernelGlobals *kg, int sample, int rng_hash, int dimension) +{ + /* Fallback to random */ + if (sample >= NUM_PMJ_SAMPLES) { + int p = rng_hash + dimension; + return cmj_randfloat(sample, p); + } + uint tmp_rng = cmj_hash_simple(dimension, rng_hash); + int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2; + return __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ (tmp_rng & 0x007fffff)) - + 1.0f; +} + +ccl_device void pmj_sample_2D( + KernelGlobals *kg, int sample, int rng_hash, int dimension, float *fx, float *fy) +{ + if (sample >= NUM_PMJ_SAMPLES) { + int p = rng_hash + dimension; + *fx = cmj_randfloat(sample, p); + *fy = cmj_randfloat(sample, p + 1); + return; + } + uint tmp_rng = cmj_hash_simple(dimension, rng_hash); + int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2; + *fx = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ (tmp_rng & 0x007fffff)) - + 1.0f; + tmp_rng = cmj_hash_simple(dimension + 1, rng_hash); + *fy = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index + 1) ^ + (tmp_rng & 0x007fffff)) - + 1.0f; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h index 7345e9ee5bb..98136bc7047 100644 --- a/intern/cycles/kernel/kernel_passes.h +++ b/intern/cycles/kernel/kernel_passes.h @@ -29,7 +29,9 @@ ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, if (kernel_data.film.pass_denoising_data == 0) return; - buffer += (sample & 1) ? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A; + buffer += sample_is_even(kernel_data.integrator.sampling_pattern, sample) ? + DENOISING_PASS_SHADOW_B : + DENOISING_PASS_SHADOW_A; path_total = ensure_finite(path_total); path_total_shaded = ensure_finite(path_total_shaded); @@ -58,7 +60,8 @@ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg, } float3 normal = make_float3(0.0f, 0.0f, 0.0f); - float3 albedo = make_float3(0.0f, 0.0f, 0.0f); + float3 diffuse_albedo = make_float3(0.0f, 0.0f, 0.0f); + float3 specular_albedo = make_float3(0.0f, 0.0f, 0.0f); float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f; for (int i = 0; i < sd->num_closure; i++) { @@ -70,24 +73,31 @@ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg, /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */ normal += sc->N * sc->sample_weight; sum_weight += sc->sample_weight; - if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) { - float3 closure_albedo = sc->weight; - /* Closures that include a Fresnel term typically have weights close to 1 even though their - * actual contribution is significantly lower. - * To account for this, we scale their weight by the average fresnel factor (the same is also - * done for the sample weight in the BSDF setup, so we don't need to scale that here). */ - if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(sc->type)) { - MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc; - closure_albedo *= bsdf->extra->fresnel_color; - } - else if (sc->type == CLOSURE_BSDF_PRINCIPLED_SHEEN_ID) { - PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf *)sc; - closure_albedo *= bsdf->avg_value; - } - albedo += closure_albedo; + float3 closure_albedo = sc->weight; + /* Closures that include a Fresnel term typically have weights close to 1 even though their + * actual contribution is significantly lower. + * To account for this, we scale their weight by the average fresnel factor (the same is also + * done for the sample weight in the BSDF setup, so we don't need to scale that here). */ + if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(sc->type)) { + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc; + closure_albedo *= bsdf->extra->fresnel_color; + } + else if (sc->type == CLOSURE_BSDF_PRINCIPLED_SHEEN_ID) { + PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf *)sc; + closure_albedo *= bsdf->avg_value; + } + else if (sc->type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID) { + closure_albedo *= bsdf_principled_hair_albedo(sc); + } + + if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) { + diffuse_albedo += closure_albedo; sum_nonspecular_weight += sc->sample_weight; } + else { + specular_albedo += closure_albedo; + } } /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */ @@ -101,10 +111,14 @@ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg, normal = transform_direction(&worldtocamera, normal); L->denoising_normal += ensure_finite3(state->denoising_feature_weight * normal); - L->denoising_albedo += ensure_finite3(state->denoising_feature_weight * albedo); + L->denoising_albedo += ensure_finite3(state->denoising_feature_weight * + state->denoising_feature_throughput * diffuse_albedo); state->denoising_feature_weight = 0.0f; } + else { + state->denoising_feature_throughput *= specular_albedo; + } } #endif /* __DENOISING_FEATURES__ */ @@ -240,8 +254,6 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, L->color_glossy += shader_bsdf_glossy(kg, sd) * throughput; if (light_flag & PASSMASK_COMPONENT(TRANSMISSION)) L->color_transmission += shader_bsdf_transmission(kg, sd) * throughput; - if (light_flag & PASSMASK_COMPONENT(SUBSURFACE)) - L->color_subsurface += shader_bsdf_subsurface(kg, sd) * throughput; if (light_flag & PASSMASK(MIST)) { /* bring depth into 0..1 range */ @@ -287,11 +299,8 @@ ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, if (light_flag & PASSMASK(TRANSMISSION_INDIRECT)) kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect, L->indirect_transmission); - if (light_flag & PASSMASK(SUBSURFACE_INDIRECT)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect, - L->indirect_subsurface); if (light_flag & PASSMASK(VOLUME_INDIRECT)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_indirect, L->indirect_scatter); + kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_indirect, L->indirect_volume); if (light_flag & PASSMASK(DIFFUSE_DIRECT)) kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, L->direct_diffuse); if (light_flag & PASSMASK(GLOSSY_DIRECT)) @@ -299,11 +308,8 @@ ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, if (light_flag & PASSMASK(TRANSMISSION_DIRECT)) kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct, L->direct_transmission); - if (light_flag & PASSMASK(SUBSURFACE_DIRECT)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct, - L->direct_subsurface); if (light_flag & PASSMASK(VOLUME_DIRECT)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_direct, L->direct_scatter); + kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_direct, L->direct_volume); if (light_flag & PASSMASK(EMISSION)) kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, L->emission); @@ -319,8 +325,6 @@ ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, if (light_flag & PASSMASK(TRANSMISSION_COLOR)) kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color, L->color_transmission); - if (light_flag & PASSMASK(SUBSURFACE_COLOR)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, L->color_subsurface); if (light_flag & PASSMASK(SHADOW)) { float4 shadow = L->shadow; shadow.w = kernel_data.film.pass_shadow_scale; @@ -387,6 +391,41 @@ ccl_device_inline void kernel_write_result(KernelGlobals *kg, #ifdef __KERNEL_DEBUG__ kernel_write_debug_passes(kg, buffer, L); #endif + + /* Adaptive Sampling. Fill the additional buffer with the odd samples and calculate our stopping + criteria. This is the heuristic from "A hierarchical automatic stopping condition for Monte + Carlo global illumination" except that here it is applied per pixel and not in hierarchical + tiles. */ + if (kernel_data.film.pass_adaptive_aux_buffer && + kernel_data.integrator.adaptive_threshold > 0.0f) { + if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) { + kernel_write_pass_float4(buffer + kernel_data.film.pass_adaptive_aux_buffer, + make_float4(L_sum.x * 2.0f, L_sum.y * 2.0f, L_sum.z * 2.0f, 0.0f)); + } +#ifdef __KERNEL_CPU__ + if (sample > kernel_data.integrator.adaptive_min_samples && + (sample & (ADAPTIVE_SAMPLE_STEP - 1)) == (ADAPTIVE_SAMPLE_STEP - 1)) { + kernel_do_adaptive_stopping(kg, buffer, sample); + } +#endif + } + + /* Write the sample count as negative numbers initially to mark the samples as in progress. + * Once the tile has finished rendering, the sign gets flipped and all the pixel values + * are scaled as if they were taken at a uniform sample count. */ + if (kernel_data.film.pass_sample_count) { + /* Make sure it's a negative number. In progressive refine mode, this bit gets flipped between + * passes. */ +#ifdef __ATOMIC_PASS_WRITE__ + atomic_fetch_and_or_uint32((ccl_global uint *)(buffer + kernel_data.film.pass_sample_count), + 0x80000000); +#else + if (buffer[kernel_data.film.pass_sample_count] > 0) { + buffer[kernel_data.film.pass_sample_count] *= -1.0f; + } +#endif + kernel_write_pass_float(buffer + kernel_data.film.pass_sample_count, -1.0f); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 1a0b67275a7..db35303e3f1 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -18,6 +18,7 @@ # include "kernel/osl/osl_shader.h" #endif +// clang-format off #include "kernel/kernel_random.h" #include "kernel/kernel_projection.h" #include "kernel/kernel_montecarlo.h" @@ -31,6 +32,7 @@ #include "kernel/kernel_accumulate.h" #include "kernel/kernel_shader.h" #include "kernel/kernel_light.h" +#include "kernel/kernel_adaptive_sampling.h" #include "kernel/kernel_passes.h" #if defined(__VOLUME__) || defined(__SUBSURFACE__) @@ -48,6 +50,7 @@ #include "kernel/kernel_path_surface.h" #include "kernel/kernel_path_volume.h" #include "kernel/kernel_path_subsurface.h" +// clang-format on CCL_NAMESPACE_BEGIN @@ -168,19 +171,19 @@ ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(KernelGlobals *k Ray volume_ray = *ray; volume_ray.t = (hit) ? isect->t : FLT_MAX; - bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); + float step_size = volume_stack_step_size(kg, state->volume_stack); # ifdef __VOLUME_DECOUPLED__ int sampling_method = volume_stack_sampling_method(kg, state->volume_stack); bool direct = (state->flag & PATH_RAY_CAMERA) != 0; - bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method); + bool decoupled = kernel_volume_use_decoupled(kg, step_size, direct, sampling_method); if (decoupled) { /* cache steps along volume for repeated sampling */ VolumeSegment volume_segment; shader_setup_from_volume(kg, sd, &volume_ray); - kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous); + kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, step_size); volume_segment.sampling_method = sampling_method; @@ -226,7 +229,7 @@ ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(KernelGlobals *k { /* integrate along volume segment with distance sampling */ VolumeIntegrateResult result = kernel_volume_integrate( - kg, state, sd, &volume_ray, L, throughput, heterogeneous); + kg, state, sd, &volume_ray, L, throughput, step_size); # ifdef __VOLUME_SCATTER__ if (result == VOLUME_PATH_SCATTERED) { @@ -656,6 +659,14 @@ ccl_device void kernel_path_trace( buffer += index * pass_stride; + if (kernel_data.film.pass_adaptive_aux_buffer) { + ccl_global float4 *aux = (ccl_global float4 *)(buffer + + kernel_data.film.pass_adaptive_aux_buffer); + if (aux->w > 0.0f) { + return; + } + } + /* Initialize random numbers and sample ray. */ uint rng_hash; Ray ray; diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h index f75e4ab4c97..337c4fb1d10 100644 --- a/intern/cycles/kernel/kernel_path_branched.h +++ b/intern/cycles/kernel/kernel_path_branched.h @@ -91,7 +91,7 @@ ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg, Ray volume_ray = *ray; volume_ray.t = (hit) ? isect->t : FLT_MAX; - bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); + float step_size = volume_stack_step_size(kg, state->volume_stack); # ifdef __VOLUME_DECOUPLED__ /* decoupled ray marching only supported on CPU */ @@ -100,7 +100,7 @@ ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg, VolumeSegment volume_segment; shader_setup_from_volume(kg, sd, &volume_ray); - kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous); + kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, step_size); /* direct light sampling */ if (volume_segment.closure_flag & SD_SCATTER) { @@ -171,7 +171,7 @@ ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg, path_state_branch(&ps, j, num_samples); VolumeIntegrateResult result = kernel_volume_integrate( - kg, &ps, sd, &volume_ray, L, &tp, heterogeneous); + kg, &ps, sd, &volume_ray, L, &tp, step_size); # ifdef __VOLUME_SCATTER__ if (result == VOLUME_PATH_SCATTERED) { @@ -523,6 +523,14 @@ ccl_device void kernel_branched_path_trace( buffer += index * pass_stride; + if (kernel_data.film.pass_adaptive_aux_buffer) { + ccl_global float4 *aux = (ccl_global float4 *)(buffer + + kernel_data.film.pass_adaptive_aux_buffer); + if (aux->w > 0.0f) { + return; + } + } + /* initialize random numbers and ray */ uint rng_hash; Ray ray; diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h index 8735e3208db..c389c815ae2 100644 --- a/intern/cycles/kernel/kernel_path_state.h +++ b/intern/cycles/kernel/kernel_path_state.h @@ -41,9 +41,11 @@ ccl_device_inline void path_state_init(KernelGlobals *kg, if (kernel_data.film.pass_denoising_data) { state->flag |= PATH_RAY_STORE_SHADOW_INFO; state->denoising_feature_weight = 1.0f; + state->denoising_feature_throughput = make_float3(1.0f, 1.0f, 1.0f); } else { state->denoising_feature_weight = 0.0f; + state->denoising_feature_throughput = make_float3(0.0f, 0.0f, 0.0f); } #endif /* __DENOISING_FEATURES__ */ diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h index 80738213d2a..f4c3b36e778 100644 --- a/intern/cycles/kernel/kernel_random.h +++ b/intern/cycles/kernel/kernel_random.h @@ -43,7 +43,7 @@ ccl_device uint sobol_dimension(KernelGlobals *kg, int index, int dimension) uint i = index + SOBOL_SKIP; for (int j = 0, x; (x = find_first_set(i)); i >>= x) { j += x; - result ^= kernel_tex_fetch(__sobol_directions, 32 * dimension + j - 1); + result ^= kernel_tex_fetch(__sample_pattern_lut, 32 * dimension + j - 1); } return result; } @@ -56,7 +56,9 @@ ccl_device_forceinline float path_rng_1D( #ifdef __DEBUG_CORRELATION__ return (float)drand48(); #endif - + if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) { + return pmj_sample_1D(kg, sample, rng_hash, dimension); + } #ifdef __CMJ__ # ifdef __SOBOL__ if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) @@ -99,7 +101,10 @@ ccl_device_forceinline void path_rng_2D(KernelGlobals *kg, *fy = (float)drand48(); return; #endif - + if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) { + pmj_sample_2D(kg, sample, rng_hash, dimension, fx, fy); + return; + } #ifdef __CMJ__ # ifdef __SOBOL__ if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) @@ -284,4 +289,31 @@ ccl_device float lcg_step_float_addrspace(ccl_addr_space uint *rng) return (float)*rng * (1.0f / (float)0xFFFFFFFF); } +ccl_device_inline bool sample_is_even(int pattern, int sample) +{ + if (pattern == SAMPLING_PATTERN_PMJ) { + /* See Section 10.2.1, "Progressive Multi-Jittered Sample Sequences", Christensen et al. + * We can use this to get divide sample sequence into two classes for easier variance + * estimation. */ +#if defined(__GNUC__) && !defined(__KERNEL_GPU__) + return __builtin_popcount(sample & 0xaaaaaaaa) & 1; +#elif defined(__NVCC__) + return __popc(sample & 0xaaaaaaaa) & 1; +#elif defined(__KERNEL_OPENCL__) + return popcount(sample & 0xaaaaaaaa) & 1; +#else + /* TODO(Stefan): popcnt intrinsic for Windows with fallback for older CPUs. */ + int i = sample & 0xaaaaaaaa; + i = i - ((i >> 1) & 0x55555555); + i = (i & 0x33333333) + ((i >> 2) & 0x33333333); + i = (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24; + return i & 1; +#endif + } + else { + /* TODO(Stefan): Are there reliable ways of dividing CMJ and Sobol into two classes? */ + return sample & 0x1; + } +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index d03faff4242..9700aaba80f 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -23,10 +23,12 @@ * Release. */ +// clang-format off #include "kernel/closure/alloc.h" #include "kernel/closure/bsdf_util.h" #include "kernel/closure/bsdf.h" #include "kernel/closure/emissive.h" +// clang-format on #include "kernel/svm/svm.h" @@ -901,7 +903,8 @@ ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd) for (int i = 0; i < sd->num_closure; i++) { ShaderClosure *sc = &sd->closure[i]; - if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) + if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) || CLOSURE_IS_BSSRDF(sc->type) || + CLOSURE_IS_BSDF_BSSRDF(sc->type)) eval += sc->weight; } @@ -936,20 +939,6 @@ ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd) return eval; } -ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd) -{ - float3 eval = make_float3(0.0f, 0.0f, 0.0f); - - for (int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type)) - eval += sc->weight; - } - - return eval; -} - ccl_device float3 shader_bsdf_average_normal(KernelGlobals *kg, ShaderData *sd) { float3 N = make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h index 23e30db1b08..ed8572467ea 100644 --- a/intern/cycles/kernel/kernel_subsurface.h +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -428,12 +428,17 @@ ccl_device_noinline hit = (ss_isect->num_hits > 0); if (hit) { +#ifdef __KERNEL_OPTIX__ + /* t is always in world space with OptiX. */ + t = ss_isect->hits[0].t; +#else /* Compute world space distance to surface hit. */ float3 D = ray->D; object_inverse_dir_transform(kg, sd, &D); D = normalize(D) * ss_isect->hits[0].t; object_dir_transform(kg, sd, &D); t = len(D); +#endif } /* Advance to new scatter location. */ diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h index 9eaa6b5516e..c8e01677d09 100644 --- a/intern/cycles/kernel/kernel_textures.h +++ b/intern/cycles/kernel/kernel_textures.h @@ -35,6 +35,7 @@ KERNEL_TEX(KernelObject, __objects) KERNEL_TEX(Transform, __object_motion_pass) KERNEL_TEX(DecomposedTransform, __object_motion) KERNEL_TEX(uint, __object_flag) +KERNEL_TEX(float, __object_volume_step) /* cameras */ KERNEL_TEX(DecomposedTransform, __camera_motion) @@ -77,7 +78,7 @@ KERNEL_TEX(KernelShader, __shaders) KERNEL_TEX(float, __lookup_table) /* sobol */ -KERNEL_TEX(uint, __sobol_directions) +KERNEL_TEX(uint, __sample_pattern_lut) /* image textures */ KERNEL_TEX(TextureInfo, __texture_info) diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index c35e345763a..b6d319311a1 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -63,6 +63,11 @@ CCL_NAMESPACE_BEGIN #define VOLUME_STACK_SIZE 32 +/* Adaptive sampling constants */ +#define ADAPTIVE_SAMPLE_STEP 4 +static_assert((ADAPTIVE_SAMPLE_STEP & (ADAPTIVE_SAMPLE_STEP - 1)) == 0, + "ADAPTIVE_SAMPLE_STEP must be power of two for bitwise operations to work"); + /* Split kernel constants */ #define WORK_POOL_SIZE_GPU 64 #define WORK_POOL_SIZE_CPU 1 @@ -106,8 +111,6 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_AO_PREVIEW__ # define __SVM__ # define __EMISSION__ -# define __TEXTURES__ -# define __EXTRA_NODES__ # define __HOLDOUT__ # define __MULTI_CLOSURE__ # define __TRANSPARENT_SHADOWS__ @@ -220,7 +223,6 @@ typedef enum ShaderEvalType { SHADER_EVAL_DIFFUSE_COLOR, SHADER_EVAL_GLOSSY_COLOR, SHADER_EVAL_TRANSMISSION_COLOR, - SHADER_EVAL_SUBSURFACE_COLOR, SHADER_EVAL_EMISSION, SHADER_EVAL_AOV_COLOR, SHADER_EVAL_AOV_VALUE, @@ -232,7 +234,6 @@ typedef enum ShaderEvalType { SHADER_EVAL_DIFFUSE, SHADER_EVAL_GLOSSY, SHADER_EVAL_TRANSMISSION, - SHADER_EVAL_SUBSURFACE, /* extra */ SHADER_EVAL_ENVIRONMENT, @@ -269,6 +270,7 @@ enum PathTraceDimension { enum SamplingPattern { SAMPLING_PATTERN_SOBOL = 0, SAMPLING_PATTERN_CMJ = 1, + SAMPLING_PATTERN_PMJ = 2, SAMPLING_NUM_PATTERNS, }; @@ -375,6 +377,8 @@ typedef enum PassType { PASS_CRYPTOMATTE, PASS_AOV_COLOR, PASS_AOV_VALUE, + PASS_ADAPTIVE_AUX_BUFFER, + PASS_SAMPLE_COUNT, PASS_CATEGORY_MAIN_END = 31, PASS_MIST = 32, @@ -392,10 +396,7 @@ typedef enum PassType { PASS_TRANSMISSION_DIRECT, PASS_TRANSMISSION_INDIRECT, PASS_TRANSMISSION_COLOR, - PASS_SUBSURFACE_DIRECT, - PASS_SUBSURFACE_INDIRECT, - PASS_SUBSURFACE_COLOR, - PASS_VOLUME_DIRECT, + PASS_VOLUME_DIRECT = 50, PASS_VOLUME_INDIRECT, /* No Scatter color since it's tricky to define what it would even mean. */ PASS_CATEGORY_LIGHT_END = 63, @@ -445,23 +446,20 @@ typedef enum eBakePassFilter { BAKE_FILTER_DIFFUSE = (1 << 3), BAKE_FILTER_GLOSSY = (1 << 4), BAKE_FILTER_TRANSMISSION = (1 << 5), - BAKE_FILTER_SUBSURFACE = (1 << 6), - BAKE_FILTER_EMISSION = (1 << 7), - BAKE_FILTER_AO = (1 << 8), + BAKE_FILTER_EMISSION = (1 << 6), + BAKE_FILTER_AO = (1 << 7), } eBakePassFilter; typedef enum BakePassFilterCombos { BAKE_FILTER_COMBINED = (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT | BAKE_FILTER_DIFFUSE | - BAKE_FILTER_GLOSSY | BAKE_FILTER_TRANSMISSION | BAKE_FILTER_SUBSURFACE | - BAKE_FILTER_EMISSION | BAKE_FILTER_AO), + BAKE_FILTER_GLOSSY | BAKE_FILTER_TRANSMISSION | BAKE_FILTER_EMISSION | + BAKE_FILTER_AO), BAKE_FILTER_DIFFUSE_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_DIFFUSE), BAKE_FILTER_GLOSSY_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_GLOSSY), BAKE_FILTER_TRANSMISSION_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_TRANSMISSION), - BAKE_FILTER_SUBSURFACE_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_SUBSURFACE), BAKE_FILTER_DIFFUSE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_DIFFUSE), BAKE_FILTER_GLOSSY_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_GLOSSY), BAKE_FILTER_TRANSMISSION_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_TRANSMISSION), - BAKE_FILTER_SUBSURFACE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_SUBSURFACE), } BakePassFilterCombos; typedef enum DenoiseFlag { @@ -471,9 +469,7 @@ typedef enum DenoiseFlag { DENOISING_CLEAN_GLOSSY_IND = (1 << 3), DENOISING_CLEAN_TRANSMISSION_DIR = (1 << 4), DENOISING_CLEAN_TRANSMISSION_IND = (1 << 5), - DENOISING_CLEAN_SUBSURFACE_DIR = (1 << 6), - DENOISING_CLEAN_SUBSURFACE_IND = (1 << 7), - DENOISING_CLEAN_ALL_PASSES = (1 << 8) - 1, + DENOISING_CLEAN_ALL_PASSES = (1 << 6) - 1, } DenoiseFlag; #ifdef __KERNEL_DEBUG__ @@ -493,8 +489,7 @@ typedef ccl_addr_space struct PathRadianceState { float3 diffuse; float3 glossy; float3 transmission; - float3 subsurface; - float3 scatter; + float3 volume; float3 direct; #endif @@ -517,19 +512,16 @@ typedef ccl_addr_space struct PathRadiance { float3 color_diffuse; float3 color_glossy; float3 color_transmission; - float3 color_subsurface; float3 direct_diffuse; float3 direct_glossy; float3 direct_transmission; - float3 direct_subsurface; - float3 direct_scatter; + float3 direct_volume; float3 indirect_diffuse; float3 indirect_glossy; float3 indirect_transmission; - float3 indirect_subsurface; - float3 indirect_scatter; + float3 indirect_volume; float4 shadow; float mist; @@ -583,8 +575,7 @@ typedef struct BsdfEval { float3 glossy; float3 transmission; float3 transparent; - float3 subsurface; - float3 scatter; + float3 volume; #endif #ifdef __SHADOW_TRICKS__ float3 sum_no_mis; @@ -725,8 +716,7 @@ typedef enum PrimitiveType { /* Attributes */ typedef enum AttributePrimitive { - ATTR_PRIM_TRIANGLE = 0, - ATTR_PRIM_CURVE, + ATTR_PRIM_GEOMETRY = 0, ATTR_PRIM_SUBD, ATTR_PRIM_TYPES @@ -754,6 +744,7 @@ typedef enum AttributeStandard { ATTR_STD_UV, ATTR_STD_UV_TANGENT, ATTR_STD_UV_TANGENT_SIGN, + ATTR_STD_VERTEX_COLOR, ATTR_STD_GENERATED, ATTR_STD_GENERATED_TRANSFORM, ATTR_STD_POSITION_UNDEFORMED, @@ -894,13 +885,13 @@ enum ShaderDataFlag { SD_HAS_DISPLACEMENT = (1 << 26), /* Has constant emission (value stored in __shaders) */ SD_HAS_CONSTANT_EMISSION = (1 << 27), - /* Needs to access attributes */ - SD_NEED_ATTRIBUTES = (1 << 28), + /* Needs to access attributes for volume rendering */ + SD_NEED_VOLUME_ATTRIBUTES = (1 << 28), SD_SHADER_FLAGS = (SD_USE_MIS | SD_HAS_TRANSPARENT_SHADOW | SD_HAS_VOLUME | SD_HAS_ONLY_VOLUME | SD_HETEROGENEOUS_VOLUME | SD_HAS_BSSRDF_BUMP | SD_VOLUME_EQUIANGULAR | SD_VOLUME_MIS | SD_VOLUME_CUBIC | SD_HAS_BUMP | SD_HAS_DISPLACEMENT | - SD_HAS_CONSTANT_EMISSION | SD_NEED_ATTRIBUTES) + SD_HAS_CONSTANT_EMISSION | SD_NEED_VOLUME_ATTRIBUTES) }; /* Object flags. */ @@ -1057,6 +1048,7 @@ typedef struct PathState { #ifdef __DENOISING_FEATURES__ float denoising_feature_weight; + float3 denoising_feature_throughput; #endif /* __DENOISING_FEATURES__ */ /* multiple importance sampling */ @@ -1213,18 +1205,15 @@ typedef struct KernelFilm { int pass_diffuse_color; int pass_glossy_color; int pass_transmission_color; - int pass_subsurface_color; int pass_diffuse_indirect; int pass_glossy_indirect; int pass_transmission_indirect; - int pass_subsurface_indirect; int pass_volume_indirect; int pass_diffuse_direct; int pass_glossy_direct; int pass_transmission_direct; - int pass_subsurface_direct; int pass_volume_direct; int pass_emission; @@ -1239,6 +1228,9 @@ typedef struct KernelFilm { int cryptomatte_depth; int pass_cryptomatte; + int pass_adaptive_aux_buffer; + int pass_sample_count; + int pass_mist; float mist_start; float mist_inv_depth; @@ -1251,7 +1243,6 @@ typedef struct KernelFilm { int pass_aov_color; int pass_aov_value; int pad1; - int pad2; /* XYZ to rendering color space transform. float4 instead of float3 to * ensure consistent padding/alignment across devices. */ @@ -1273,6 +1264,8 @@ typedef struct KernelFilm { int display_divide_pass_stride; int use_display_exposure; int use_display_pass_alpha; + + int pad3, pad4, pad5; } KernelFilm; static_assert_align(KernelFilm, 16); @@ -1280,6 +1273,7 @@ typedef struct KernelBackground { /* only shader index */ int surface_shader; int volume_shader; + float volume_step_size; int transparent; float transparent_roughness_squared_threshold; @@ -1287,7 +1281,6 @@ typedef struct KernelBackground { float ao_factor; float ao_distance; float ao_bounces_factor; - float ao_pad; } KernelBackground; static_assert_align(KernelBackground, 16); @@ -1354,18 +1347,20 @@ typedef struct KernelIntegrator { /* sampler */ int sampling_pattern; int aa_samples; + int adaptive_min_samples; + float adaptive_threshold; /* volume render */ int use_volumes; int volume_max_steps; - float volume_step_size; + float volume_step_rate; int volume_samples; int start_sample; int max_closures; - int pad1; + int pad1, pad2, pad3; } KernelIntegrator; static_assert_align(KernelIntegrator, 16); @@ -1679,12 +1674,16 @@ typedef struct WorkTile { uint start_sample; uint num_samples; - uint offset; + int offset; uint stride; ccl_global float *buffer; } WorkTile; +/* Precoumputed sample table sizes for PMJ02 sampler. */ +#define NUM_PMJ_SAMPLES 64 * 64 +#define NUM_PMJ_PATTERNS 48 + CCL_NAMESPACE_END #endif /* __KERNEL_TYPES_H__ */ diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index f443bb88463..b4f9d2186f4 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -48,7 +48,8 @@ ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg, shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW); if (sd->flag & SD_EXTINCTION) { - *extinction = sd->closure_transparent_extinction; + const float density = object_volume_density(kg, sd->object); + *extinction = sd->closure_transparent_extinction * density; return true; } else { @@ -84,6 +85,11 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals *kg, } } + const float density = object_volume_density(kg, sd->object); + coeff->sigma_s *= density; + coeff->sigma_t *= density; + coeff->emission *= density; + return true; } @@ -101,15 +107,19 @@ ccl_device float kernel_volume_channel_get(float3 value, int channel) #ifdef __VOLUME__ -ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack) +ccl_device float volume_stack_step_size(KernelGlobals *kg, ccl_addr_space VolumeStack *stack) { + float step_size = FLT_MAX; + for (int i = 0; stack[i].shader != SHADER_NONE; i++) { int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags; + bool heterogeneous = false; + if (shader_flag & SD_HETEROGENEOUS_VOLUME) { - return true; + heterogeneous = true; } - else if (shader_flag & SD_NEED_ATTRIBUTES) { + else if (shader_flag & SD_NEED_VOLUME_ATTRIBUTES) { /* We want to render world or objects without any volume grids * as homogeneous, but can only verify this at run-time since other * heterogeneous volume objects may be using the same shader. */ @@ -117,13 +127,19 @@ ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space if (object != OBJECT_NONE) { int object_flag = kernel_tex_fetch(__object_flag, object); if (object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) { - return true; + heterogeneous = true; } } } + + if (heterogeneous) { + float object_step_size = object_volume_step_size(kg, stack[i].object); + object_step_size *= kernel_data.integrator.volume_step_rate; + step_size = fminf(object_step_size, step_size); + } } - return false; + return step_size; } ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stack) @@ -158,12 +174,13 @@ ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stac ccl_device_inline void kernel_volume_step_init(KernelGlobals *kg, ccl_addr_space PathState *state, + const float object_step_size, float t, float *step_size, float *step_offset) { const int max_steps = kernel_data.integrator.volume_max_steps; - float step = min(kernel_data.integrator.volume_step_size, t); + float step = min(object_step_size, t); /* compute exact steps in advance for malloc */ if (t > max_steps * step) { @@ -199,7 +216,8 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, ccl_addr_space PathState *state, Ray *ray, ShaderData *sd, - float3 *throughput) + float3 *throughput, + const float object_step_size) { float3 tp = *throughput; const float tp_eps = 1e-6f; /* todo: this is likely not the right value */ @@ -207,7 +225,7 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, /* prepare for stepping */ int max_steps = kernel_data.integrator.volume_max_steps; float step_offset, step_size; - kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset); + kernel_volume_step_init(kg, state, object_step_size, ray->t, &step_size, &step_offset); /* compute extinction at the start */ float t = 0.0f; @@ -264,8 +282,9 @@ ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, { shader_setup_from_volume(kg, shadow_sd, ray); - if (volume_stack_is_heterogeneous(kg, state->volume_stack)) - kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput); + float step_size = volume_stack_step_size(kg, state->volume_stack); + if (step_size != FLT_MAX) + kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput, step_size); else kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput); } @@ -533,7 +552,8 @@ kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg, Ray *ray, ShaderData *sd, PathRadiance *L, - ccl_addr_space float3 *throughput) + ccl_addr_space float3 *throughput, + const float object_step_size) { float3 tp = *throughput; const float tp_eps = 1e-6f; /* todo: this is likely not the right value */ @@ -541,7 +561,7 @@ kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg, /* prepare for stepping */ int max_steps = kernel_data.integrator.volume_max_steps; float step_offset, step_size; - kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset); + kernel_volume_step_init(kg, state, object_step_size, ray->t, &step_size, &step_offset); /* compute coefficients at the start */ float t = 0.0f; @@ -679,12 +699,13 @@ kernel_volume_integrate(KernelGlobals *kg, Ray *ray, PathRadiance *L, ccl_addr_space float3 *throughput, - bool heterogeneous) + float step_size) { shader_setup_from_volume(kg, sd, ray); - if (heterogeneous) - return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput); + if (step_size != FLT_MAX) + return kernel_volume_integrate_heterogeneous_distance( + kg, state, ray, sd, L, throughput, step_size); else return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, true); } @@ -735,7 +756,7 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, Ray *ray, ShaderData *sd, VolumeSegment *segment, - bool heterogeneous) + const float object_step_size) { const float tp_eps = 1e-6f; /* todo: this is likely not the right value */ @@ -743,9 +764,9 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, int max_steps; float step_size, step_offset; - if (heterogeneous) { + if (object_step_size != FLT_MAX) { max_steps = kernel_data.integrator.volume_max_steps; - kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset); + kernel_volume_step_init(kg, state, object_step_size, ray->t, &step_size, &step_offset); # ifdef __KERNEL_CPU__ /* NOTE: For the branched path tracing it's possible to have direct diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h index 799561a7466..c642d227e4b 100644 --- a/intern/cycles/kernel/kernel_work_stealing.h +++ b/intern/cycles/kernel/kernel_work_stealing.h @@ -23,17 +23,41 @@ CCL_NAMESPACE_BEGIN * Utility functions for work stealing */ +/* Map global work index to tile, pixel X/Y and sample. */ +ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile, + uint global_work_index, + ccl_private uint *x, + ccl_private uint *y, + ccl_private uint *sample) +{ +#ifdef __KERNEL_CUDA__ + /* Keeping threads for the same pixel together improves performance on CUDA. */ + uint sample_offset = global_work_index % tile->num_samples; + uint pixel_offset = global_work_index / tile->num_samples; +#else /* __KERNEL_CUDA__ */ + uint tile_pixels = tile->w * tile->h; + uint sample_offset = global_work_index / tile_pixels; + uint pixel_offset = global_work_index - sample_offset * tile_pixels; +#endif /* __KERNEL_CUDA__ */ + uint y_offset = pixel_offset / tile->w; + uint x_offset = pixel_offset - y_offset * tile->w; + + *x = tile->x + x_offset; + *y = tile->y + y_offset; + *sample = tile->start_sample + sample_offset; +} + #ifdef __KERNEL_OPENCL__ # pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable #endif #ifdef __SPLIT_KERNEL__ /* Returns true if there is work */ -ccl_device bool get_next_work(KernelGlobals *kg, - ccl_global uint *work_pools, - uint total_work_size, - uint ray_index, - ccl_private uint *global_work_index) +ccl_device bool get_next_work_item(KernelGlobals *kg, + ccl_global uint *work_pools, + uint total_work_size, + uint ray_index, + ccl_private uint *global_work_index) { /* With a small amount of work there may be more threads than work due to * rounding up of global size, stop such threads immediately. */ @@ -56,31 +80,37 @@ ccl_device bool get_next_work(KernelGlobals *kg, /* Test if all work for this pool is done. */ return (*global_work_index < total_work_size); } -#endif -/* Map global work index to tile, pixel X/Y and sample. */ -ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile, - uint global_work_index, - ccl_private uint *x, - ccl_private uint *y, - ccl_private uint *sample) +ccl_device bool get_next_work(KernelGlobals *kg, + ccl_global uint *work_pools, + uint total_work_size, + uint ray_index, + ccl_private uint *global_work_index) { -#ifdef __KERNEL_CUDA__ - /* Keeping threads for the same pixel together improves performance on CUDA. */ - uint sample_offset = global_work_index % tile->num_samples; - uint pixel_offset = global_work_index / tile->num_samples; -#else /* __KERNEL_CUDA__ */ - uint tile_pixels = tile->w * tile->h; - uint sample_offset = global_work_index / tile_pixels; - uint pixel_offset = global_work_index - sample_offset * tile_pixels; -#endif /* __KERNEL_CUDA__ */ - uint y_offset = pixel_offset / tile->w; - uint x_offset = pixel_offset - y_offset * tile->w; - - *x = tile->x + x_offset; - *y = tile->y + y_offset; - *sample = tile->start_sample + sample_offset; + bool got_work = false; + if (kernel_data.film.pass_adaptive_aux_buffer) { + do { + got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index); + if (got_work) { + ccl_global WorkTile *tile = &kernel_split_params.tile; + uint x, y, sample; + get_work_pixel(tile, *global_work_index, &x, &y, &sample); + uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride; + ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; + ccl_global float4 *aux = (ccl_global float4 *)(buffer + + kernel_data.film.pass_adaptive_aux_buffer); + if (aux->w == 0.0f) { + break; + } + } + } while (got_work); + } + else { + got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index); + } + return got_work; } +#endif CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp index f2146302a27..8829a14ead5 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp @@ -72,7 +72,7 @@ void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t s assert(0); } -void kernel_tex_copy(KernelGlobals *kg, const char *name, void *mem, size_t size) +void kernel_global_memory_copy(KernelGlobals *kg, const char *name, void *mem, size_t size) { if (0) { } diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h index f5d981fb71a..683f4b88d79 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h @@ -89,5 +89,9 @@ DECLARE_SPLIT_KERNEL_FUNCTION(enqueue_inactive) DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update) +DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_stopping) +DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x) +DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y) +DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples) #undef KERNEL_ARCH diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index 8f311baf010..f87501db258 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -474,7 +474,7 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl { const TextureInfo &info = kernel_tex_fetch(__texture_info, id); - switch (kernel_tex_type(id)) { + switch (info.data_type) { case IMAGE_DATA_TYPE_HALF: return TextureInterpolator<half>::interp(info, x, y); case IMAGE_DATA_TYPE_BYTE: @@ -498,28 +498,34 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl } } -ccl_device float4 kernel_tex_image_interp_3d( - KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp) +ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, + int id, + float3 P, + InterpolationType interp) { const TextureInfo &info = kernel_tex_fetch(__texture_info, id); - switch (kernel_tex_type(id)) { + if (info.use_transform_3d) { + P = transform_point(&info.transform_3d, P); + } + + switch (info.data_type) { case IMAGE_DATA_TYPE_HALF: - return TextureInterpolator<half>::interp_3d(info, x, y, z, interp); + return TextureInterpolator<half>::interp_3d(info, P.x, P.y, P.z, interp); case IMAGE_DATA_TYPE_BYTE: - return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp); + return TextureInterpolator<uchar>::interp_3d(info, P.x, P.y, P.z, interp); case IMAGE_DATA_TYPE_USHORT: - return TextureInterpolator<uint16_t>::interp_3d(info, x, y, z, interp); + return TextureInterpolator<uint16_t>::interp_3d(info, P.x, P.y, P.z, interp); case IMAGE_DATA_TYPE_FLOAT: - return TextureInterpolator<float>::interp_3d(info, x, y, z, interp); + return TextureInterpolator<float>::interp_3d(info, P.x, P.y, P.z, interp); case IMAGE_DATA_TYPE_HALF4: - return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp); + return TextureInterpolator<half4>::interp_3d(info, P.x, P.y, P.z, interp); case IMAGE_DATA_TYPE_BYTE4: - return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp); + return TextureInterpolator<uchar4>::interp_3d(info, P.x, P.y, P.z, interp); case IMAGE_DATA_TYPE_USHORT4: - return TextureInterpolator<ushort4>::interp_3d(info, x, y, z, interp); + return TextureInterpolator<ushort4>::interp_3d(info, P.x, P.y, P.z, interp); case IMAGE_DATA_TYPE_FLOAT4: - return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp); + return TextureInterpolator<float4>::interp_3d(info, P.x, P.y, P.z, interp); default: assert(0); return make_float4( diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index 9ca3f46b5b6..091e53cfd83 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -20,6 +20,7 @@ * simply includes this file without worry of copying actual implementation over. */ +// clang-format off #include "kernel/kernel_compat_cpu.h" #ifndef KERNEL_STUB @@ -58,6 +59,10 @@ # include "kernel/split/kernel_next_iteration_setup.h" # include "kernel/split/kernel_indirect_subsurface.h" # include "kernel/split/kernel_buffer_update.h" +# include "kernel/split/kernel_adaptive_stopping.h" +# include "kernel/split/kernel_adaptive_filter_x.h" +# include "kernel/split/kernel_adaptive_filter_y.h" +# include "kernel/split/kernel_adaptive_adjust_samples.h" # endif /* __SPLIT_KERNEL__ */ #else # define STUB_ASSERT(arch, name) \ @@ -67,6 +72,7 @@ # include "kernel/split/kernel_data_init.h" # endif /* __SPLIT_KERNEL__ */ #endif /* KERNEL_STUB */ +// clang-format on CCL_NAMESPACE_BEGIN @@ -204,6 +210,10 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) +DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping) +DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x) +DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y) +DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples) #endif /* __SPLIT_KERNEL__ */ #undef KERNEL_STUB diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu index fbb773533ce..22fd5ea5634 100644 --- a/intern/cycles/kernel/kernels/cuda/filter.cu +++ b/intern/cycles/kernel/kernels/cuda/filter.cu @@ -57,9 +57,9 @@ kernel_cuda_filter_convert_to_rgb(float *rgb, float *buf, int sw, int sh, int st if (num_inputs > 0) { float *in = buf + x * pass_stride + (y * stride + pass_offset.x) / sizeof(float); float *out = rgb + (x + y * sw) * 3; - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; + out[0] = clamp(in[0], 0.0f, 10000.0f); + out[1] = clamp(in[1], 0.0f, 10000.0f); + out[2] = clamp(in[2], 0.0f, 10000.0f); } if (num_inputs > 1) { float *in = buf + x * pass_stride + (y * stride + pass_offset.y) / sizeof(float); diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu index af311027f78..c4c810c6a82 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel.cu @@ -33,6 +33,7 @@ #include "kernel/kernel_path_branched.h" #include "kernel/kernel_bake.h" #include "kernel/kernel_work_stealing.h" +#include "kernel/kernel_adaptive_sampling.h" /* kernels */ extern "C" __global__ void @@ -83,6 +84,75 @@ kernel_cuda_branched_path_trace(WorkTile *tile, uint total_work_size) extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_adaptive_stopping(WorkTile *tile, int sample, uint total_work_size) +{ + int work_index = ccl_global_id(0); + bool thread_is_active = work_index < total_work_size; + KernelGlobals kg; + if(thread_is_active && kernel_data.film.pass_adaptive_aux_buffer) { + uint x = tile->x + work_index % tile->w; + uint y = tile->y + work_index / tile->w; + int index = tile->offset + x + y * tile->stride; + ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride; + kernel_do_adaptive_stopping(&kg, buffer, sample); + } +} + +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_adaptive_filter_x(WorkTile *tile, int sample, uint) +{ + KernelGlobals kg; + if(kernel_data.film.pass_adaptive_aux_buffer && sample > kernel_data.integrator.adaptive_min_samples) { + if(ccl_global_id(0) < tile->h) { + int y = tile->y + ccl_global_id(0); + kernel_do_adaptive_filter_x(&kg, y, tile); + } + } +} + +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_adaptive_filter_y(WorkTile *tile, int sample, uint) +{ + KernelGlobals kg; + if(kernel_data.film.pass_adaptive_aux_buffer && sample > kernel_data.integrator.adaptive_min_samples) { + if(ccl_global_id(0) < tile->w) { + int x = tile->x + ccl_global_id(0); + kernel_do_adaptive_filter_y(&kg, x, tile); + } + } +} + +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_adaptive_scale_samples(WorkTile *tile, int start_sample, int sample, uint total_work_size) +{ + if(kernel_data.film.pass_adaptive_aux_buffer) { + int work_index = ccl_global_id(0); + bool thread_is_active = work_index < total_work_size; + KernelGlobals kg; + if(thread_is_active) { + uint x = tile->x + work_index % tile->w; + uint y = tile->y + work_index / tile->w; + int index = tile->offset + x + y * tile->stride; + ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride; + if(buffer[kernel_data.film.pass_sample_count] < 0.0f) { + buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count]; + float sample_multiplier = sample / max((float)start_sample + 1.0f, buffer[kernel_data.film.pass_sample_count]); + if(sample_multiplier != 1.0f) { + kernel_adaptive_post_adjust(&kg, buffer, sample_multiplier); + } + } + else { + kernel_adaptive_post_adjust(&kg, buffer, sample / (sample - 1.0f)); + } + } + } +} + +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h index 7c68f08ea10..1d425d132a1 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h +++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h @@ -124,7 +124,7 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl CUtexObject tex = (CUtexObject)info.data; /* float4, byte4, ushort4 and half4 */ - const int texture_type = kernel_tex_type(id); + const int texture_type = info.data_type; if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) { if (info.interpolation == INTERPOLATION_CUBIC) { @@ -149,14 +149,25 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl } } -ccl_device float4 kernel_tex_image_interp_3d( - KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp) +ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, + int id, + float3 P, + InterpolationType interp) { const TextureInfo &info = kernel_tex_fetch(__texture_info, id); + + if (info.use_transform_3d) { + P = transform_point(&info.transform_3d, P); + } + + const float x = P.x; + const float y = P.y; + const float z = P.z; + CUtexObject tex = (CUtexObject)info.data; uint interpolation = (interp == INTERPOLATION_NONE) ? info.interpolation : interp; - const int texture_type = kernel_tex_type(id); + const int texture_type = info.data_type; if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) { if (interpolation == INTERPOLATION_CUBIC) { diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu index 43b3d0aa0e6..95ad7599cf1 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu @@ -43,6 +43,10 @@ #include "kernel/split/kernel_next_iteration_setup.h" #include "kernel/split/kernel_indirect_subsurface.h" #include "kernel/split/kernel_buffer_update.h" +#include "kernel/split/kernel_adaptive_stopping.h" +#include "kernel/split/kernel_adaptive_filter_x.h" +#include "kernel/split/kernel_adaptive_filter_y.h" +#include "kernel/split/kernel_adaptive_adjust_samples.h" #include "kernel/kernel_film.h" @@ -121,6 +125,10 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) +DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping) +DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x) +DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y) +DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples) extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl new file mode 100644 index 00000000000..ebdb99d4730 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl @@ -0,0 +1,23 @@ +/* + * Copyright 2019 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_adaptive_adjust_samples.h" + +#define KERNEL_NAME adaptive_adjust_samples +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl new file mode 100644 index 00000000000..76d82d4184e --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl @@ -0,0 +1,23 @@ +/* + * Copyright 2019 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_adaptive_filter_x.h" + +#define KERNEL_NAME adaptive_filter_x +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl new file mode 100644 index 00000000000..1e6d15ba0f2 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl @@ -0,0 +1,23 @@ +/* + * Copyright 2019 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_adaptive_filter_y.h" + +#define KERNEL_NAME adaptive_filter_y +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl new file mode 100644 index 00000000000..51de0059667 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl @@ -0,0 +1,23 @@ +/* + * Copyright 2019 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_adaptive_stopping.h" + +#define KERNEL_NAME adaptive_stopping +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h index b6390679331..89fcb0ae60f 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h +++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h @@ -47,7 +47,7 @@ ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset) { - const int texture_type = kernel_tex_type(id); + const int texture_type = info->data_type; /* Float4 */ if (texture_type == IMAGE_DATA_TYPE_FLOAT4) { @@ -202,11 +202,19 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl } } -ccl_device float4 -kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, int interp) +ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float3 P, int interp) { const ccl_global TextureInfo *info = kernel_tex_info(kg, id); + if (info->use_transform_3d) { + Transform tfm = info->transform_3d; + P = transform_point(&tfm, P); + } + + const float x = P.x; + const float y = P.y; + const float z = P.z; + if (info->extension == EXTENSION_CLIP) { if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) { return make_float4(0.0f, 0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl index 6041f13b52b..c3b7b09460a 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl @@ -28,3 +28,7 @@ #include "kernel/kernels/opencl/kernel_next_iteration_setup.cl" #include "kernel/kernels/opencl/kernel_indirect_subsurface.cl" #include "kernel/kernels/opencl/kernel_buffer_update.cl" +#include "kernel/kernels/opencl/kernel_adaptive_stopping.cl" +#include "kernel/kernels/opencl/kernel_adaptive_filter_x.cl" +#include "kernel/kernels/opencl/kernel_adaptive_filter_y.cl" +#include "kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl" diff --git a/intern/cycles/kernel/osl/CMakeLists.txt b/intern/cycles/kernel/osl/CMakeLists.txt index 5be5bd181ec..fc0c845fd4f 100644 --- a/intern/cycles/kernel/osl/CMakeLists.txt +++ b/intern/cycles/kernel/osl/CMakeLists.txt @@ -33,6 +33,9 @@ set(LIB ${LLVM_LIBRARY} ) +# OSL and LLVM are built without RTTI +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${RTTI_DISABLE_FLAGS}") + include_directories(${INC}) include_directories(SYSTEM ${INC_SYS}) diff --git a/intern/cycles/kernel/osl/background.cpp b/intern/cycles/kernel/osl/background.cpp index b395227845d..3f9de5ab33d 100644 --- a/intern/cycles/kernel/osl/background.cpp +++ b/intern/cycles/kernel/osl/background.cpp @@ -36,9 +36,11 @@ #include "kernel/osl/osl_closures.h" +// clang-format off #include "kernel/kernel_compat_cpu.h" #include "kernel/closure/alloc.h" #include "kernel/closure/emissive.h" +// clang-format on CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp index c5edc7c9be3..76a2e41abfa 100644 --- a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp +++ b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp @@ -37,10 +37,12 @@ #include "kernel/kernel_compat_cpu.h" #include "kernel/osl/osl_closures.h" +// clang-format off #include "kernel/kernel_types.h" #include "kernel/kernel_montecarlo.h" #include "kernel/closure/alloc.h" #include "kernel/closure/bsdf_diffuse_ramp.h" +// clang-format on CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp index 4b7e59ff932..b78dc8a3a67 100644 --- a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp +++ b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp @@ -37,9 +37,11 @@ #include "kernel/kernel_compat_cpu.h" #include "kernel/osl/osl_closures.h" +// clang-format off #include "kernel/kernel_types.h" #include "kernel/closure/alloc.h" #include "kernel/closure/bsdf_phong_ramp.h" +// clang-format on CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/osl/emissive.cpp b/intern/cycles/kernel/osl/emissive.cpp index c29ddb13e2e..d656723bac2 100644 --- a/intern/cycles/kernel/osl/emissive.cpp +++ b/intern/cycles/kernel/osl/emissive.cpp @@ -36,10 +36,12 @@ #include "kernel/osl/osl_closures.h" +// clang-format off #include "kernel/kernel_compat_cpu.h" #include "kernel/kernel_types.h" #include "kernel/closure/alloc.h" #include "kernel/closure/emissive.h" +// clang-format on CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp index dd52c33071c..c5ca8616fbd 100644 --- a/intern/cycles/kernel/osl/osl_bssrdf.cpp +++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp @@ -35,6 +35,7 @@ #include "kernel/kernel_compat_cpu.h" #include "kernel/osl/osl_closures.h" +// clang-format off #include "kernel/kernel_types.h" #include "kernel/kernel_montecarlo.h" @@ -43,6 +44,7 @@ #include "kernel/closure/bsdf_diffuse.h" #include "kernel/closure/bsdf_principled_diffuse.h" #include "kernel/closure/bssrdf.h" +// clang-format on CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp index 463a65f21a0..ea5e00ec23c 100644 --- a/intern/cycles/kernel/osl/osl_closures.cpp +++ b/intern/cycles/kernel/osl/osl_closures.cpp @@ -39,6 +39,7 @@ #include "util/util_math.h" #include "util/util_param.h" +// clang-format off #include "kernel/kernel_types.h" #include "kernel/kernel_compat_cpu.h" #include "kernel/split/kernel_split_data_types.h" @@ -63,6 +64,7 @@ #include "kernel/closure/bsdf_principled_diffuse.h" #include "kernel/closure/bsdf_principled_sheen.h" #include "kernel/closure/volume.h" +// clang-format on CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h index d3db6b71f5c..d12afdb80dd 100644 --- a/intern/cycles/kernel/osl/osl_closures.h +++ b/intern/cycles/kernel/osl/osl_closures.h @@ -33,12 +33,12 @@ #ifndef __OSL_CLOSURES_H__ #define __OSL_CLOSURES_H__ -#include "util/util_types.h" #include "kernel/kernel_types.h" +#include "util/util_types.h" +#include <OSL/genclosure.h> #include <OSL/oslclosure.h> #include <OSL/oslexec.h> -#include <OSL/genclosure.h> CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/osl/osl_globals.h b/intern/cycles/kernel/osl/osl_globals.h index 0e6c8d21534..c06c9abd4c1 100644 --- a/intern/cycles/kernel/osl/osl_globals.h +++ b/intern/cycles/kernel/osl/osl_globals.h @@ -27,8 +27,8 @@ # include "util/util_map.h" # include "util/util_param.h" # include "util/util_thread.h" -# include "util/util_vector.h" # include "util/util_unique_ptr.h" +# include "util/util_vector.h" # ifndef WIN32 using std::isfinite; diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index 767bd7702ae..2857de533f3 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -39,6 +39,7 @@ #include "util/util_logging.h" #include "util/util_string.h" +// clang-format off #include "kernel/kernel_compat_cpu.h" #include "kernel/split/kernel_split_data_types.h" #include "kernel/kernel_globals.h" @@ -56,6 +57,7 @@ #include "kernel/kernel_projection.h" #include "kernel/kernel_accumulate.h" #include "kernel/kernel_shader.h" +// clang-format on CCL_NAMESPACE_BEGIN @@ -1220,8 +1222,8 @@ bool OSLRenderServices::texture3d(ustring filename, ShaderData *sd = (ShaderData *)(sg->renderstate); KernelGlobals *kernel_globals = sd->osl_globals; int slot = handle->svm_slot; - float4 rgba = kernel_tex_image_interp_3d( - kernel_globals, slot, P.x, P.y, P.z, INTERPOLATION_NONE); + float3 P_float3 = make_float3(P.x, P.y, P.z); + float4 rgba = kernel_tex_image_interp_3d(kernel_globals, slot, P_float3, INTERPOLATION_NONE); result[0] = rgba[0]; if (nchannels > 1) diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h index 469c5188730..d32dace23bf 100644 --- a/intern/cycles/kernel/osl/osl_services.h +++ b/intern/cycles/kernel/osl/osl_services.h @@ -25,8 +25,8 @@ * attributes. */ -#include <OSL/oslexec.h> #include <OSL/oslclosure.h> +#include <OSL/oslexec.h> #ifdef WITH_PTEX class PtexCache; diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp index db5ad06d3fc..2318813949e 100644 --- a/intern/cycles/kernel/osl/osl_shader.cpp +++ b/intern/cycles/kernel/osl/osl_shader.cpp @@ -16,6 +16,7 @@ #include <OSL/oslexec.h> +// clang-format off #include "kernel/kernel_compat_cpu.h" #include "kernel/kernel_montecarlo.h" #include "kernel/kernel_types.h" @@ -28,6 +29,7 @@ #include "kernel/osl/osl_globals.h" #include "kernel/osl/osl_services.h" #include "kernel/osl/osl_shader.h" +// clang-format on #include "util/util_foreach.h" @@ -382,10 +384,6 @@ int OSLShader::find_attribute(KernelGlobals *kg, { /* for OSL, a hash map is used to lookup the attribute by name. */ int object = sd->object * ATTR_PRIM_TYPES; -#ifdef __HAIR__ - if (sd->type & PRIMITIVE_ALL_CURVE) - object += ATTR_PRIM_CURVE; -#endif OSLGlobals::AttributeMap &attr_map = kg->osl->attribute_map[object]; ustring stdname(std::string("geom:") + diff --git a/intern/cycles/kernel/shaders/CMakeLists.txt b/intern/cycles/kernel/shaders/CMakeLists.txt index f4258da70d3..9dcedc9ba19 100644 --- a/intern/cycles/kernel/shaders/CMakeLists.txt +++ b/intern/cycles/kernel/shaders/CMakeLists.txt @@ -78,6 +78,7 @@ set(SRC_OSL node_value.osl node_vector_curves.osl node_vector_math.osl + node_vector_rotate.osl node_vector_transform.osl node_velvet_bsdf.osl node_vertex_color.osl @@ -95,16 +96,19 @@ set(SRC_OSL node_rgb_to_bw.osl ) +# The headers that OSL ships differs per release so we can not +# hardcode this. +file(GLOB SRC_OSL_HEADER_DIST ${OSL_SHADER_DIR}/*.h) + set(SRC_OSL_HEADERS node_color.h node_fresnel.h node_hash.h + node_math.h node_noise.h node_ramp_util.h - stdosl.h - oslutil.h - vector2.h - vector4.h + stdcycles.h + ${SRC_OSL_HEADER_DIST} ) set(SRC_OSO @@ -119,7 +123,7 @@ foreach(_file ${SRC_OSL}) string(REPLACE ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} _OSO_FILE ${_OSO_FILE}) add_custom_command( OUTPUT ${_OSO_FILE} - COMMAND ${OSL_COMPILER} -q -O2 -I"${CMAKE_CURRENT_SOURCE_DIR}" -o ${_OSO_FILE} ${_OSL_FILE} + COMMAND ${OSL_COMPILER} -q -O2 -I"${CMAKE_CURRENT_SOURCE_DIR}" -I"${OSL_SHADER_DIR}" -o ${_OSO_FILE} ${_OSL_FILE} DEPENDS ${_OSL_FILE} ${SRC_OSL_HEADERS} ${OSL_COMPILER}) list(APPEND SRC_OSO ${_OSO_FILE} diff --git a/intern/cycles/kernel/shaders/node_absorption_volume.osl b/intern/cycles/kernel/shaders/node_absorption_volume.osl index e99bd254666..37ccc4c969f 100644 --- a/intern/cycles/kernel/shaders/node_absorption_volume.osl +++ b/intern/cycles/kernel/shaders/node_absorption_volume.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_absorption_volume(color Color = color(0.8, 0.8, 0.8), float Density = 1.0, diff --git a/intern/cycles/kernel/shaders/node_add_closure.osl b/intern/cycles/kernel/shaders/node_add_closure.osl index 077e2735e61..27ecc9ef0c2 100644 --- a/intern/cycles/kernel/shaders/node_add_closure.osl +++ b/intern/cycles/kernel/shaders/node_add_closure.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_add_closure(closure color Closure1 = 0, closure color Closure2 = 0, diff --git a/intern/cycles/kernel/shaders/node_ambient_occlusion.osl b/intern/cycles/kernel/shaders/node_ambient_occlusion.osl index 7bf28719e78..22d245d0698 100644 --- a/intern/cycles/kernel/shaders/node_ambient_occlusion.osl +++ b/intern/cycles/kernel/shaders/node_ambient_occlusion.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_ambient_occlusion(color ColorIn = color(1.0, 1.0, 1.0), int samples = 16, diff --git a/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl b/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl index 165c09eb8e0..739cd375ab2 100644 --- a/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl @@ -13,8 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -#include "stdosl.h" +#include "stdcycles.h" shader node_anisotropic_bsdf(color Color = 0.0, string distribution = "GGX", diff --git a/intern/cycles/kernel/shaders/node_attribute.osl b/intern/cycles/kernel/shaders/node_attribute.osl index 336543cc130..abec8ebfbf0 100644 --- a/intern/cycles/kernel/shaders/node_attribute.osl +++ b/intern/cycles/kernel/shaders/node_attribute.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_attribute(string bump_offset = "center", string name = "", diff --git a/intern/cycles/kernel/shaders/node_background.osl b/intern/cycles/kernel/shaders/node_background.osl index 6ded0d2c65c..3f45db751b3 100644 --- a/intern/cycles/kernel/shaders/node_background.osl +++ b/intern/cycles/kernel/shaders/node_background.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_background(color Color = 0.8, float Strength = 1.0, diff --git a/intern/cycles/kernel/shaders/node_bevel.osl b/intern/cycles/kernel/shaders/node_bevel.osl index 189c20c52e7..e87ddab716d 100644 --- a/intern/cycles/kernel/shaders/node_bevel.osl +++ b/intern/cycles/kernel/shaders/node_bevel.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_bevel(int samples = 4, float Radius = 0.05, diff --git a/intern/cycles/kernel/shaders/node_blackbody.osl b/intern/cycles/kernel/shaders/node_blackbody.osl index 8a24bf1e28b..741efae755d 100644 --- a/intern/cycles/kernel/shaders/node_blackbody.osl +++ b/intern/cycles/kernel/shaders/node_blackbody.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_blackbody(float Temperature = 1200.0, output color Color = 0.0) { diff --git a/intern/cycles/kernel/shaders/node_brick_texture.osl b/intern/cycles/kernel/shaders/node_brick_texture.osl index 30644ef2ff3..075a324c730 100644 --- a/intern/cycles/kernel/shaders/node_brick_texture.osl +++ b/intern/cycles/kernel/shaders/node_brick_texture.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" /* Brick */ diff --git a/intern/cycles/kernel/shaders/node_brightness.osl b/intern/cycles/kernel/shaders/node_brightness.osl index 2defbc4b1db..019edfb79a3 100644 --- a/intern/cycles/kernel/shaders/node_brightness.osl +++ b/intern/cycles/kernel/shaders/node_brightness.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_brightness(color ColorIn = 0.8, float Bright = 0.0, diff --git a/intern/cycles/kernel/shaders/node_bump.osl b/intern/cycles/kernel/shaders/node_bump.osl index 3697bb37fd9..811182f40b5 100644 --- a/intern/cycles/kernel/shaders/node_bump.osl +++ b/intern/cycles/kernel/shaders/node_bump.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" /* "Bump Mapping Unparameterized Surfaces on the GPU" * Morten S. Mikkelsen, 2010 */ diff --git a/intern/cycles/kernel/shaders/node_camera.osl b/intern/cycles/kernel/shaders/node_camera.osl index 833e9e775fe..45ca50c6e1e 100644 --- a/intern/cycles/kernel/shaders/node_camera.osl +++ b/intern/cycles/kernel/shaders/node_camera.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_camera(output vector ViewVector = vector(0.0, 0.0, 0.0), output float ViewZDepth = 0.0, diff --git a/intern/cycles/kernel/shaders/node_checker_texture.osl b/intern/cycles/kernel/shaders/node_checker_texture.osl index e05cf20099f..d6a30dbdb40 100644 --- a/intern/cycles/kernel/shaders/node_checker_texture.osl +++ b/intern/cycles/kernel/shaders/node_checker_texture.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" /* Checker */ diff --git a/intern/cycles/kernel/shaders/node_clamp.osl b/intern/cycles/kernel/shaders/node_clamp.osl index d689ba7f809..ce9392a0d98 100644 --- a/intern/cycles/kernel/shaders/node_clamp.osl +++ b/intern/cycles/kernel/shaders/node_clamp.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_clamp(string type = "minmax", float Value = 1.0, diff --git a/intern/cycles/kernel/shaders/node_combine_hsv.osl b/intern/cycles/kernel/shaders/node_combine_hsv.osl index 1658cf3d774..05e502b5bc1 100644 --- a/intern/cycles/kernel/shaders/node_combine_hsv.osl +++ b/intern/cycles/kernel/shaders/node_combine_hsv.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_combine_hsv(float H = 0.0, float S = 0.0, float V = 0.0, output color Color = 0.8) { diff --git a/intern/cycles/kernel/shaders/node_combine_rgb.osl b/intern/cycles/kernel/shaders/node_combine_rgb.osl index aaa95e9c5af..036f371eb5c 100644 --- a/intern/cycles/kernel/shaders/node_combine_rgb.osl +++ b/intern/cycles/kernel/shaders/node_combine_rgb.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_combine_rgb(float R = 0.0, float G = 0.0, float B = 0.0, output color Image = 0.8) { diff --git a/intern/cycles/kernel/shaders/node_combine_xyz.osl b/intern/cycles/kernel/shaders/node_combine_xyz.osl index 4ab49168704..4ebd86b605c 100644 --- a/intern/cycles/kernel/shaders/node_combine_xyz.osl +++ b/intern/cycles/kernel/shaders/node_combine_xyz.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_combine_xyz(float X = 0.0, float Y = 0.0, float Z = 0.0, output vector Vector = 0.8) { diff --git a/intern/cycles/kernel/shaders/node_convert_from_color.osl b/intern/cycles/kernel/shaders/node_convert_from_color.osl index 7ea9a1e4fb3..c3f0e118844 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_color.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_color.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_convert_from_color(color value_color = 0.0, output string value_string = "", diff --git a/intern/cycles/kernel/shaders/node_convert_from_float.osl b/intern/cycles/kernel/shaders/node_convert_from_float.osl index 13b5dea0838..61a15a1c2b0 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_float.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_float.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_convert_from_float(float value_float = 0.0, output string value_string = "", diff --git a/intern/cycles/kernel/shaders/node_convert_from_int.osl b/intern/cycles/kernel/shaders/node_convert_from_int.osl index a59e025d822..2e6a99b2765 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_int.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_int.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_convert_from_int(int value_int = 0, output string value_string = "", diff --git a/intern/cycles/kernel/shaders/node_convert_from_normal.osl b/intern/cycles/kernel/shaders/node_convert_from_normal.osl index 7bdd94d1941..64201d63190 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_normal.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_normal.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_convert_from_normal(normal value_normal = normal(0.0, 0.0, 0.0), output string value_string = "", diff --git a/intern/cycles/kernel/shaders/node_convert_from_point.osl b/intern/cycles/kernel/shaders/node_convert_from_point.osl index 79c1719e7a7..11d64f76d6f 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_point.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_point.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_convert_from_point(point value_point = point(0.0, 0.0, 0.0), output string value_string = "", diff --git a/intern/cycles/kernel/shaders/node_convert_from_string.osl b/intern/cycles/kernel/shaders/node_convert_from_string.osl index 48d894a6b3e..b496c4e6d05 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_string.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_string.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_convert_from_string(string value_string = "", output color value_color = color(0.0, 0.0, 0.0), diff --git a/intern/cycles/kernel/shaders/node_convert_from_vector.osl b/intern/cycles/kernel/shaders/node_convert_from_vector.osl index 92ab2313bcb..820faabd32b 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_vector.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_vector.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_convert_from_vector(vector value_vector = vector(0.0, 0.0, 0.0), output string value_string = "", diff --git a/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl b/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl index bd5554b838a..f5886f534eb 100644 --- a/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_diffuse_bsdf(color Color = 0.8, float Roughness = 0.0, diff --git a/intern/cycles/kernel/shaders/node_displacement.osl b/intern/cycles/kernel/shaders/node_displacement.osl index a1f3b7b7737..44a4828d511 100644 --- a/intern/cycles/kernel/shaders/node_displacement.osl +++ b/intern/cycles/kernel/shaders/node_displacement.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_displacement(string space = "object", float Height = 0.0, diff --git a/intern/cycles/kernel/shaders/node_emission.osl b/intern/cycles/kernel/shaders/node_emission.osl index 57973f57ac6..f289a9711d9 100644 --- a/intern/cycles/kernel/shaders/node_emission.osl +++ b/intern/cycles/kernel/shaders/node_emission.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_emission(color Color = 0.8, float Strength = 1.0, output closure color Emission = 0) { diff --git a/intern/cycles/kernel/shaders/node_environment_texture.osl b/intern/cycles/kernel/shaders/node_environment_texture.osl index 43f607f7cb0..d04743eb368 100644 --- a/intern/cycles/kernel/shaders/node_environment_texture.osl +++ b/intern/cycles/kernel/shaders/node_environment_texture.osl @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "stdosl.h" #include "node_color.h" +#include "stdcycles.h" vector environment_texture_direction_to_equirectangular(vector dir) { diff --git a/intern/cycles/kernel/shaders/node_fresnel.osl b/intern/cycles/kernel/shaders/node_fresnel.osl index 89250db40f3..cff084c344d 100644 --- a/intern/cycles/kernel/shaders/node_fresnel.osl +++ b/intern/cycles/kernel/shaders/node_fresnel.osl @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "stdosl.h" #include "node_fresnel.h" +#include "stdcycles.h" shader node_fresnel(float IOR = 1.45, normal Normal = N, output float Fac = 0.0) { diff --git a/intern/cycles/kernel/shaders/node_gamma.osl b/intern/cycles/kernel/shaders/node_gamma.osl index 9b9c17dc8af..0816df64fe8 100644 --- a/intern/cycles/kernel/shaders/node_gamma.osl +++ b/intern/cycles/kernel/shaders/node_gamma.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_gamma(color ColorIn = 0.8, float Gamma = 1.0, output color ColorOut = 0.0) { diff --git a/intern/cycles/kernel/shaders/node_geometry.osl b/intern/cycles/kernel/shaders/node_geometry.osl index 3cf2e974022..55cda71db1b 100644 --- a/intern/cycles/kernel/shaders/node_geometry.osl +++ b/intern/cycles/kernel/shaders/node_geometry.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_geometry(normal NormalIn = N, string bump_offset = "center", diff --git a/intern/cycles/kernel/shaders/node_glass_bsdf.osl b/intern/cycles/kernel/shaders/node_glass_bsdf.osl index c0b8a002536..0042d573f8d 100644 --- a/intern/cycles/kernel/shaders/node_glass_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_glass_bsdf.osl @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "stdosl.h" #include "node_fresnel.h" +#include "stdcycles.h" shader node_glass_bsdf(color Color = 0.8, string distribution = "sharp", diff --git a/intern/cycles/kernel/shaders/node_glossy_bsdf.osl b/intern/cycles/kernel/shaders/node_glossy_bsdf.osl index 2d40ee8d3f6..c73604d3650 100644 --- a/intern/cycles/kernel/shaders/node_glossy_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_glossy_bsdf.osl @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "stdosl.h" #include "node_fresnel.h" +#include "stdcycles.h" shader node_glossy_bsdf(color Color = 0.8, string distribution = "GGX", diff --git a/intern/cycles/kernel/shaders/node_gradient_texture.osl b/intern/cycles/kernel/shaders/node_gradient_texture.osl index 6cb181aee72..e9acebc0572 100644 --- a/intern/cycles/kernel/shaders/node_gradient_texture.osl +++ b/intern/cycles/kernel/shaders/node_gradient_texture.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" /* Gradient */ diff --git a/intern/cycles/kernel/shaders/node_hair_bsdf.osl b/intern/cycles/kernel/shaders/node_hair_bsdf.osl index bc912087666..3e0ac7af2e0 100644 --- a/intern/cycles/kernel/shaders/node_hair_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_hair_bsdf.osl @@ -16,7 +16,7 @@ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_hair_bsdf(color Color = 0.8, string component = "reflection", diff --git a/intern/cycles/kernel/shaders/node_hair_info.osl b/intern/cycles/kernel/shaders/node_hair_info.osl index 991a27c4103..ee08ea57e68 100644 --- a/intern/cycles/kernel/shaders/node_hair_info.osl +++ b/intern/cycles/kernel/shaders/node_hair_info.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_hair_info(output float IsStrand = 0.0, output float Intercept = 0.0, diff --git a/intern/cycles/kernel/shaders/node_hash.h b/intern/cycles/kernel/shaders/node_hash.h index 7affe432bf2..b42e42ff910 100644 --- a/intern/cycles/kernel/shaders/node_hash.h +++ b/intern/cycles/kernel/shaders/node_hash.h @@ -1,4 +1,4 @@ -#include "stdosl.h" +#include "stdcycles.h" #include "vector2.h" #include "vector4.h" diff --git a/intern/cycles/kernel/shaders/node_holdout.osl b/intern/cycles/kernel/shaders/node_holdout.osl index b51bc0543a5..92e41c92f72 100644 --- a/intern/cycles/kernel/shaders/node_holdout.osl +++ b/intern/cycles/kernel/shaders/node_holdout.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_holdout(output closure color Holdout = holdout()) { diff --git a/intern/cycles/kernel/shaders/node_hsv.osl b/intern/cycles/kernel/shaders/node_hsv.osl index 30c56a20a92..4417057b10f 100644 --- a/intern/cycles/kernel/shaders/node_hsv.osl +++ b/intern/cycles/kernel/shaders/node_hsv.osl @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "stdosl.h" #include "node_color.h" +#include "stdcycles.h" shader node_hsv(float Hue = 0.5, float Saturation = 1.0, diff --git a/intern/cycles/kernel/shaders/node_ies_light.osl b/intern/cycles/kernel/shaders/node_ies_light.osl index 4d881eb3b65..76348b4d758 100644 --- a/intern/cycles/kernel/shaders/node_ies_light.osl +++ b/intern/cycles/kernel/shaders/node_ies_light.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" /* IES Light */ diff --git a/intern/cycles/kernel/shaders/node_image_texture.osl b/intern/cycles/kernel/shaders/node_image_texture.osl index 9a0f2d054ea..22d34a1082c 100644 --- a/intern/cycles/kernel/shaders/node_image_texture.osl +++ b/intern/cycles/kernel/shaders/node_image_texture.osl @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "stdosl.h" #include "node_color.h" +#include "stdcycles.h" point texco_remap_square(point co) { diff --git a/intern/cycles/kernel/shaders/node_invert.osl b/intern/cycles/kernel/shaders/node_invert.osl index c7d41e4e129..23c16935ca1 100644 --- a/intern/cycles/kernel/shaders/node_invert.osl +++ b/intern/cycles/kernel/shaders/node_invert.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_invert(float Fac = 1.0, color ColorIn = 0.8, output color ColorOut = 0.8) { diff --git a/intern/cycles/kernel/shaders/node_layer_weight.osl b/intern/cycles/kernel/shaders/node_layer_weight.osl index 7c46f28b41b..1662be2cad1 100644 --- a/intern/cycles/kernel/shaders/node_layer_weight.osl +++ b/intern/cycles/kernel/shaders/node_layer_weight.osl @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "stdosl.h" #include "node_fresnel.h" +#include "stdcycles.h" shader node_layer_weight(float Blend = 0.5, normal Normal = N, diff --git a/intern/cycles/kernel/shaders/node_light_falloff.osl b/intern/cycles/kernel/shaders/node_light_falloff.osl index d0d7dd9c5aa..3f3c9444a5a 100644 --- a/intern/cycles/kernel/shaders/node_light_falloff.osl +++ b/intern/cycles/kernel/shaders/node_light_falloff.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_light_falloff(float Strength = 0.0, float Smooth = 0.0, diff --git a/intern/cycles/kernel/shaders/node_light_path.osl b/intern/cycles/kernel/shaders/node_light_path.osl index c4a3624a67f..4ff06915771 100644 --- a/intern/cycles/kernel/shaders/node_light_path.osl +++ b/intern/cycles/kernel/shaders/node_light_path.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_light_path(output float IsCameraRay = 0.0, output float IsShadowRay = 0.0, diff --git a/intern/cycles/kernel/shaders/node_magic_texture.osl b/intern/cycles/kernel/shaders/node_magic_texture.osl index 26e7d57278b..476c6895f05 100644 --- a/intern/cycles/kernel/shaders/node_magic_texture.osl +++ b/intern/cycles/kernel/shaders/node_magic_texture.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" /* Magic */ diff --git a/intern/cycles/kernel/shaders/node_map_range.osl b/intern/cycles/kernel/shaders/node_map_range.osl index 242ec4271ed..1c49027e6dd 100644 --- a/intern/cycles/kernel/shaders/node_map_range.osl +++ b/intern/cycles/kernel/shaders/node_map_range.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" float safe_divide(float a, float b) { diff --git a/intern/cycles/kernel/shaders/node_mapping.osl b/intern/cycles/kernel/shaders/node_mapping.osl index e8a9d940eda..8d204999630 100644 --- a/intern/cycles/kernel/shaders/node_mapping.osl +++ b/intern/cycles/kernel/shaders/node_mapping.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" point safe_divide(point a, point b) { diff --git a/intern/cycles/kernel/shaders/node_math.h b/intern/cycles/kernel/shaders/node_math.h new file mode 100644 index 00000000000..4b1a6c5bc16 --- /dev/null +++ b/intern/cycles/kernel/shaders/node_math.h @@ -0,0 +1,110 @@ +/* + * Copyright 2011-2020 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +float safe_divide(float a, float b) +{ + return (b != 0.0) ? a / b : 0.0; +} + +vector safe_divide(vector a, vector b) +{ + return vector((b[0] != 0.0) ? a[0] / b[0] : 0.0, + (b[1] != 0.0) ? a[1] / b[1] : 0.0, + (b[2] != 0.0) ? a[2] / b[2] : 0.0); +} + +float safe_modulo(float a, float b) +{ + return (b != 0.0) ? fmod(a, b) : 0.0; +} + +float fract(float a) +{ + return a - floor(a); +} + +/* See: https://www.iquilezles.org/www/articles/smin/smin.htm. */ +float smoothmin(float a, float b, float c) +{ + if (c != 0.0) { + float h = max(c - abs(a - b), 0.0) / c; + return min(a, b) - h * h * h * c * (1.0 / 6.0); + } + else { + return min(a, b); + } +} + +float pingpong(float a, float b) +{ + return (b != 0.0) ? abs(fract((a - b) / (b * 2.0)) * b * 2.0 - b) : 0.0; +} + +float safe_sqrt(float a) +{ + return (a > 0.0) ? sqrt(a) : 0.0; +} + +float safe_log(float a, float b) +{ + return (a > 0.0 && b > 0.0) ? log(a) / log(b) : 0.0; +} + +vector project(vector v, vector v_proj) +{ + float lenSquared = dot(v_proj, v_proj); + return (lenSquared != 0.0) ? (dot(v, v_proj) / lenSquared) * v_proj : vector(0.0); +} + +vector snap(vector a, vector b) +{ + return floor(safe_divide(a, b)) * b; +} + +/* Adapted from godotengine math_funcs.h. */ +float wrap(float value, float max, float min) +{ + float range = max - min; + return (range != 0.0) ? value - (range * floor((value - min) / range)) : min; +} + +point wrap(point value, point max, point min) +{ + return point(wrap(value[0], max[0], min[0]), + wrap(value[1], max[1], min[1]), + wrap(value[2], max[2], min[2])); +} + +matrix euler_to_mat(point euler) +{ + float cx = cos(euler[0]); + float cy = cos(euler[1]); + float cz = cos(euler[2]); + float sx = sin(euler[0]); + float sy = sin(euler[1]); + float sz = sin(euler[2]); + matrix mat = matrix(1.0); + mat[0][0] = cy * cz; + mat[0][1] = cy * sz; + mat[0][2] = -sy; + mat[1][0] = sy * sx * cz - cx * sz; + mat[1][1] = sy * sx * sz + cx * cz; + mat[1][2] = cy * sx; + +mat[2][0] = sy * cx * cz + sx * sz; + mat[2][1] = sy * cx * sz - sx * cz; + mat[2][2] = cy * cx; + return mat; +} diff --git a/intern/cycles/kernel/shaders/node_math.osl b/intern/cycles/kernel/shaders/node_math.osl index 1eccb56405b..dbaa7ccb60e 100644 --- a/intern/cycles/kernel/shaders/node_math.osl +++ b/intern/cycles/kernel/shaders/node_math.osl @@ -14,56 +14,8 @@ * limitations under the License. */ -#include "stdosl.h" - -float safe_divide(float a, float b) -{ - return (b != 0.0) ? a / b : 0.0; -} - -float safe_modulo(float a, float b) -{ - return (b != 0.0) ? fmod(a, b) : 0.0; -} - -float fract(float a) -{ - return a - floor(a); -} - -/* Adapted from godotengine math_funcs.h. */ -float wrap(float value, float max, float min) -{ - float range = max - min; - return (range != 0.0) ? value - (range * floor((value - min) / range)) : min; -} - -/* See: https://www.iquilezles.org/www/articles/smin/smin.htm. */ -float smoothmin(float a, float b, float c) -{ - if (c != 0.0) { - float h = max(c - abs(a - b), 0.0) / c; - return min(a, b) - h * h * h * c * (1.0 / 6.0); - } - else { - return min(a, b); - } -} - -float pingpong(float a, float b) -{ - return (b != 0.0) ? abs(fract((a - b) / (b * 2.0)) * b * 2.0 - b) : 0.0; -} - -float safe_sqrt(float a) -{ - return (a > 0.0) ? sqrt(a) : 0.0; -} - -float safe_log(float a, float b) -{ - return (a > 0.0 && b > 0.0) ? log(a) / log(b) : 0.0; -} +#include "node_math.h" +#include "stdcycles.h" /* OSL asin, acos, and pow functions are safe by default. */ shader node_math(string type = "add", diff --git a/intern/cycles/kernel/shaders/node_mix.osl b/intern/cycles/kernel/shaders/node_mix.osl index 9fbd3391ade..a13b4bb7b96 100644 --- a/intern/cycles/kernel/shaders/node_mix.osl +++ b/intern/cycles/kernel/shaders/node_mix.osl @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "stdosl.h" #include "node_color.h" +#include "stdcycles.h" color node_mix_blend(float t, color col1, color col2) { diff --git a/intern/cycles/kernel/shaders/node_mix_closure.osl b/intern/cycles/kernel/shaders/node_mix_closure.osl index 517c59c8786..94fc2171c44 100644 --- a/intern/cycles/kernel/shaders/node_mix_closure.osl +++ b/intern/cycles/kernel/shaders/node_mix_closure.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_mix_closure(float Fac = 0.5, closure color Closure1 = 0, diff --git a/intern/cycles/kernel/shaders/node_musgrave_texture.osl b/intern/cycles/kernel/shaders/node_musgrave_texture.osl index 8861f9a671a..d03b84c1ab4 100644 --- a/intern/cycles/kernel/shaders/node_musgrave_texture.osl +++ b/intern/cycles/kernel/shaders/node_musgrave_texture.osl @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "stdosl.h" #include "node_noise.h" +#include "stdcycles.h" #include "vector2.h" #include "vector4.h" @@ -691,7 +691,7 @@ shader node_musgrave_texture( float Dimension = 2.0, float Scale = 5.0, float Detail = 2.0, - float Lacunarity = 1.0, + float Lacunarity = 2.0, float Offset = 0.0, float Gain = 1.0, output float Fac = 0.0) diff --git a/intern/cycles/kernel/shaders/node_noise_texture.osl b/intern/cycles/kernel/shaders/node_noise_texture.osl index 6cff1cdab2c..4121b415673 100644 --- a/intern/cycles/kernel/shaders/node_noise_texture.osl +++ b/intern/cycles/kernel/shaders/node_noise_texture.osl @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "stdosl.h" +#include "node_noise.h" +#include "stdcycles.h" #include "vector2.h" #include "vector4.h" -#include "node_noise.h" #define vector3 point diff --git a/intern/cycles/kernel/shaders/node_normal.osl b/intern/cycles/kernel/shaders/node_normal.osl index 1d20c3e7cac..a0a88445427 100644 --- a/intern/cycles/kernel/shaders/node_normal.osl +++ b/intern/cycles/kernel/shaders/node_normal.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_normal(normal direction = normal(0.0, 0.0, 0.0), normal NormalIn = normal(0.0, 0.0, 0.0), diff --git a/intern/cycles/kernel/shaders/node_normal_map.osl b/intern/cycles/kernel/shaders/node_normal_map.osl index 90b593d00bc..912960f13ab 100644 --- a/intern/cycles/kernel/shaders/node_normal_map.osl +++ b/intern/cycles/kernel/shaders/node_normal_map.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_normal_map(normal NormalIn = N, float Strength = 1.0, diff --git a/intern/cycles/kernel/shaders/node_object_info.osl b/intern/cycles/kernel/shaders/node_object_info.osl index 350404bb747..44513d9a1ba 100644 --- a/intern/cycles/kernel/shaders/node_object_info.osl +++ b/intern/cycles/kernel/shaders/node_object_info.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_object_info(output point Location = point(0.0, 0.0, 0.0), output color Color = color(1.0, 1.0, 1.0), diff --git a/intern/cycles/kernel/shaders/node_output_displacement.osl b/intern/cycles/kernel/shaders/node_output_displacement.osl index fa7f603980b..bd60fc2b7e1 100644 --- a/intern/cycles/kernel/shaders/node_output_displacement.osl +++ b/intern/cycles/kernel/shaders/node_output_displacement.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" displacement node_output_displacement(vector Displacement = 0.0) { diff --git a/intern/cycles/kernel/shaders/node_output_surface.osl b/intern/cycles/kernel/shaders/node_output_surface.osl index 013666145da..cd746f79c4a 100644 --- a/intern/cycles/kernel/shaders/node_output_surface.osl +++ b/intern/cycles/kernel/shaders/node_output_surface.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" surface node_output_surface(closure color Surface = 0) { diff --git a/intern/cycles/kernel/shaders/node_output_volume.osl b/intern/cycles/kernel/shaders/node_output_volume.osl index dd479e751b3..4cc14cd6699 100644 --- a/intern/cycles/kernel/shaders/node_output_volume.osl +++ b/intern/cycles/kernel/shaders/node_output_volume.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" volume node_output_volume(closure color Volume = 0) { diff --git a/intern/cycles/kernel/shaders/node_particle_info.osl b/intern/cycles/kernel/shaders/node_particle_info.osl index e286c33a1ff..2dcdf3d0f3c 100644 --- a/intern/cycles/kernel/shaders/node_particle_info.osl +++ b/intern/cycles/kernel/shaders/node_particle_info.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_particle_info(output float Index = 0.0, output float Random = 0.0, diff --git a/intern/cycles/kernel/shaders/node_principled_bsdf.osl b/intern/cycles/kernel/shaders/node_principled_bsdf.osl index 657ced9b6e6..1711811ac65 100644 --- a/intern/cycles/kernel/shaders/node_principled_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_principled_bsdf.osl @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "stdosl.h" #include "node_fresnel.h" +#include "stdcycles.h" shader node_principled_bsdf(string distribution = "Multiscatter GGX", string subsurface_method = "burley", diff --git a/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl b/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl index bf986438fca..4cf17e0e703 100644 --- a/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" color log3(color a) { diff --git a/intern/cycles/kernel/shaders/node_principled_volume.osl b/intern/cycles/kernel/shaders/node_principled_volume.osl index 39cf6837eb2..0cb4cdebdaa 100644 --- a/intern/cycles/kernel/shaders/node_principled_volume.osl +++ b/intern/cycles/kernel/shaders/node_principled_volume.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_principled_volume(color Color = color(0.5, 0.5, 0.5), float Density = 1.0, diff --git a/intern/cycles/kernel/shaders/node_refraction_bsdf.osl b/intern/cycles/kernel/shaders/node_refraction_bsdf.osl index 941d99dd44d..9e9b31d9a87 100644 --- a/intern/cycles/kernel/shaders/node_refraction_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_refraction_bsdf.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_refraction_bsdf(color Color = 0.8, string distribution = "sharp", diff --git a/intern/cycles/kernel/shaders/node_rgb_curves.osl b/intern/cycles/kernel/shaders/node_rgb_curves.osl index e34eb027cc3..8850040d580 100644 --- a/intern/cycles/kernel/shaders/node_rgb_curves.osl +++ b/intern/cycles/kernel/shaders/node_rgb_curves.osl @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "stdosl.h" #include "node_ramp_util.h" +#include "stdcycles.h" shader node_rgb_curves(color ramp[] = {0.0}, float min_x = 0.0, diff --git a/intern/cycles/kernel/shaders/node_rgb_ramp.osl b/intern/cycles/kernel/shaders/node_rgb_ramp.osl index c9f9746a4fb..2131edb2688 100644 --- a/intern/cycles/kernel/shaders/node_rgb_ramp.osl +++ b/intern/cycles/kernel/shaders/node_rgb_ramp.osl @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "stdosl.h" #include "node_ramp_util.h" +#include "stdcycles.h" shader node_rgb_ramp(color ramp_color[] = {0.0}, float ramp_alpha[] = {0.0}, diff --git a/intern/cycles/kernel/shaders/node_rgb_to_bw.osl b/intern/cycles/kernel/shaders/node_rgb_to_bw.osl index 837d6caf5fc..f0a094d5b57 100644 --- a/intern/cycles/kernel/shaders/node_rgb_to_bw.osl +++ b/intern/cycles/kernel/shaders/node_rgb_to_bw.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_rgb_to_bw(color Color = 0.0, output float Val = 0.0) { diff --git a/intern/cycles/kernel/shaders/node_scatter_volume.osl b/intern/cycles/kernel/shaders/node_scatter_volume.osl index fce5716f372..36ad952dee6 100644 --- a/intern/cycles/kernel/shaders/node_scatter_volume.osl +++ b/intern/cycles/kernel/shaders/node_scatter_volume.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_scatter_volume(color Color = color(0.8, 0.8, 0.8), float Density = 1.0, diff --git a/intern/cycles/kernel/shaders/node_separate_hsv.osl b/intern/cycles/kernel/shaders/node_separate_hsv.osl index c77ed1f3755..2f902b72dbc 100644 --- a/intern/cycles/kernel/shaders/node_separate_hsv.osl +++ b/intern/cycles/kernel/shaders/node_separate_hsv.osl @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "stdosl.h" #include "node_color.h" +#include "stdcycles.h" shader node_separate_hsv(color Color = 0.8, output float H = 0.0, diff --git a/intern/cycles/kernel/shaders/node_separate_rgb.osl b/intern/cycles/kernel/shaders/node_separate_rgb.osl index ee64add27e2..62e4aedb879 100644 --- a/intern/cycles/kernel/shaders/node_separate_rgb.osl +++ b/intern/cycles/kernel/shaders/node_separate_rgb.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_separate_rgb(color Image = 0.8, output float R = 0.0, diff --git a/intern/cycles/kernel/shaders/node_separate_xyz.osl b/intern/cycles/kernel/shaders/node_separate_xyz.osl index 8a563f5e920..acaf3942b6f 100644 --- a/intern/cycles/kernel/shaders/node_separate_xyz.osl +++ b/intern/cycles/kernel/shaders/node_separate_xyz.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_separate_xyz(vector Vector = 0.8, output float X = 0.0, diff --git a/intern/cycles/kernel/shaders/node_set_normal.osl b/intern/cycles/kernel/shaders/node_set_normal.osl index 9541b829ef7..26a97e2b5d1 100644 --- a/intern/cycles/kernel/shaders/node_set_normal.osl +++ b/intern/cycles/kernel/shaders/node_set_normal.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" surface node_set_normal(normal Direction = N, output normal Normal = N) { diff --git a/intern/cycles/kernel/shaders/node_sky_texture.osl b/intern/cycles/kernel/shaders/node_sky_texture.osl index 9b29e5489c2..4def237a2e0 100644 --- a/intern/cycles/kernel/shaders/node_sky_texture.osl +++ b/intern/cycles/kernel/shaders/node_sky_texture.osl @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "stdosl.h" #include "node_color.h" +#include "stdcycles.h" float sky_angle_between(float thetav, float phiv, float theta, float phi) { diff --git a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl index e12199d8c3d..b1e854150ab 100644 --- a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl +++ b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_subsurface_scattering(color Color = 0.8, float Scale = 1.0, diff --git a/intern/cycles/kernel/shaders/node_tangent.osl b/intern/cycles/kernel/shaders/node_tangent.osl index 44eb9973f3d..83f19a4610b 100644 --- a/intern/cycles/kernel/shaders/node_tangent.osl +++ b/intern/cycles/kernel/shaders/node_tangent.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_tangent(normal NormalIn = N, string attr_name = "geom:tangent", diff --git a/intern/cycles/kernel/shaders/node_texture_coordinate.osl b/intern/cycles/kernel/shaders/node_texture_coordinate.osl index 13861653d04..ac05e984af2 100644 --- a/intern/cycles/kernel/shaders/node_texture_coordinate.osl +++ b/intern/cycles/kernel/shaders/node_texture_coordinate.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_texture_coordinate( normal NormalIn = N, diff --git a/intern/cycles/kernel/shaders/node_toon_bsdf.osl b/intern/cycles/kernel/shaders/node_toon_bsdf.osl index ed3a0b25c60..4a44730c70c 100644 --- a/intern/cycles/kernel/shaders/node_toon_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_toon_bsdf.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_toon_bsdf(color Color = 0.8, string component = "diffuse", diff --git a/intern/cycles/kernel/shaders/node_translucent_bsdf.osl b/intern/cycles/kernel/shaders/node_translucent_bsdf.osl index 7ce1ab08c59..23a562bf34d 100644 --- a/intern/cycles/kernel/shaders/node_translucent_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_translucent_bsdf.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_translucent_bsdf(color Color = 0.8, normal Normal = N, output closure color BSDF = 0) { diff --git a/intern/cycles/kernel/shaders/node_transparent_bsdf.osl b/intern/cycles/kernel/shaders/node_transparent_bsdf.osl index a735513ba89..eb737a05c41 100644 --- a/intern/cycles/kernel/shaders/node_transparent_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_transparent_bsdf.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_transparent_bsdf(color Color = 0.8, normal Normal = N, output closure color BSDF = 0) { diff --git a/intern/cycles/kernel/shaders/node_uv_map.osl b/intern/cycles/kernel/shaders/node_uv_map.osl index 6f2887be63c..88d8c5ba394 100644 --- a/intern/cycles/kernel/shaders/node_uv_map.osl +++ b/intern/cycles/kernel/shaders/node_uv_map.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_uv_map(int from_dupli = 0, string attribute = "", diff --git a/intern/cycles/kernel/shaders/node_value.osl b/intern/cycles/kernel/shaders/node_value.osl index 398e2c0e392..13197b9a27a 100644 --- a/intern/cycles/kernel/shaders/node_value.osl +++ b/intern/cycles/kernel/shaders/node_value.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_value(float value_value = 0.0, vector vector_value = vector(0.0, 0.0, 0.0), diff --git a/intern/cycles/kernel/shaders/node_vector_curves.osl b/intern/cycles/kernel/shaders/node_vector_curves.osl index e8c8036b550..9d3a2b82b0a 100644 --- a/intern/cycles/kernel/shaders/node_vector_curves.osl +++ b/intern/cycles/kernel/shaders/node_vector_curves.osl @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "stdosl.h" #include "node_ramp_util.h" +#include "stdcycles.h" shader node_vector_curves(color ramp[] = {0.0}, float min_x = 0.0, diff --git a/intern/cycles/kernel/shaders/node_vector_displacement.osl b/intern/cycles/kernel/shaders/node_vector_displacement.osl index e9bd336347f..7cd9c2a37f2 100644 --- a/intern/cycles/kernel/shaders/node_vector_displacement.osl +++ b/intern/cycles/kernel/shaders/node_vector_displacement.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_vector_displacement(color Vector = color(0.0, 0.0, 0.0), float Midlevel = 0.0, diff --git a/intern/cycles/kernel/shaders/node_vector_math.osl b/intern/cycles/kernel/shaders/node_vector_math.osl index 4fa9b3bb57b..218851598b4 100644 --- a/intern/cycles/kernel/shaders/node_vector_math.osl +++ b/intern/cycles/kernel/shaders/node_vector_math.osl @@ -14,34 +14,13 @@ * limitations under the License. */ -#include "stdosl.h" - -float safe_divide(float a, float b) -{ - return (b != 0.0) ? a / b : 0.0; -} - -vector safe_divide(vector a, vector b) -{ - return vector((b[0] != 0.0) ? a[0] / b[0] : 0.0, - (b[1] != 0.0) ? a[1] / b[1] : 0.0, - (b[2] != 0.0) ? a[2] / b[2] : 0.0); -} - -vector project(vector v, vector v_proj) -{ - float lenSquared = dot(v_proj, v_proj); - return (lenSquared != 0.0) ? (dot(v, v_proj) / lenSquared) * v_proj : vector(0.0); -} - -vector snap(vector a, vector b) -{ - return floor(safe_divide(a, b)) * b; -} +#include "node_math.h" +#include "stdcycles.h" shader node_vector_math(string type = "add", vector Vector1 = vector(0.0, 0.0, 0.0), vector Vector2 = vector(0.0, 0.0, 0.0), + vector Vector3 = vector(0.0, 0.0, 0.0), float Scale = 1.0, output float Value = 0.0, output vector Vector = vector(0.0, 0.0, 0.0)) @@ -94,6 +73,9 @@ shader node_vector_math(string type = "add", else if (type == "modulo") { Vector = fmod(Vector1, Vector2); } + else if (type == "wrap") { + Vector = wrap(Vector1, Vector2, Vector3); + } else if (type == "fraction") { Vector = Vector1 - floor(Vector1); } @@ -106,6 +88,15 @@ shader node_vector_math(string type = "add", else if (type == "maximum") { Vector = max(Vector1, Vector2); } + else if (type == "sine") { + Vector = sin(Vector1); + } + else if (type == "cosine") { + Vector = cos(Vector1); + } + else if (type == "tangent") { + Vector = tan(Vector1); + } else { warning("%s", "Unknown vector math operator!"); } diff --git a/intern/cycles/kernel/shaders/node_vector_rotate.osl b/intern/cycles/kernel/shaders/node_vector_rotate.osl new file mode 100644 index 00000000000..2efe3470ae2 --- /dev/null +++ b/intern/cycles/kernel/shaders/node_vector_rotate.osl @@ -0,0 +1,49 @@ +/* + * Copyright 2011-2020 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "node_math.h" +#include "stdcycles.h" + +shader node_vector_rotate(int invert = 0, + string type = "axis", + vector VectorIn = vector(0.0, 0.0, 0.0), + point Center = point(0.0, 0.0, 0.0), + point Rotation = point(0.0, 0.0, 0.0), + vector Axis = vector(0.0, 0.0, 1.0), + float Angle = 0.0, + output vector VectorOut = vector(0.0, 0.0, 0.0)) +{ + if (type == "euler_xyz") { + matrix rmat = (invert) ? transpose(euler_to_mat(Rotation)) : euler_to_mat(Rotation); + VectorOut = transform(rmat, VectorIn - Center) + Center; + } + else { + float a = (invert) ? -Angle : Angle; + if (type == "x_axis") { + VectorOut = rotate(VectorIn - Center, a, point(0.0), vector(1.0, 0.0, 0.0)) + Center; + } + else if (type == "y_axis") { + VectorOut = rotate(VectorIn - Center, a, point(0.0), vector(0.0, 1.0, 0.0)) + Center; + } + else if (type == "z_axis") { + VectorOut = rotate(VectorIn - Center, a, point(0.0), vector(0.0, 0.0, 1.0)) + Center; + } + else { // axis + VectorOut = (length(Axis) != 0.0) ? rotate(VectorIn - Center, a, point(0.0), Axis) + Center : + VectorIn; + } + } +} diff --git a/intern/cycles/kernel/shaders/node_vector_transform.osl b/intern/cycles/kernel/shaders/node_vector_transform.osl index 22939577be0..1db799cfc9e 100644 --- a/intern/cycles/kernel/shaders/node_vector_transform.osl +++ b/intern/cycles/kernel/shaders/node_vector_transform.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_vector_transform(string type = "vector", string convert_from = "world", diff --git a/intern/cycles/kernel/shaders/node_velvet_bsdf.osl b/intern/cycles/kernel/shaders/node_velvet_bsdf.osl index 9290b845325..299acef35ee 100644 --- a/intern/cycles/kernel/shaders/node_velvet_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_velvet_bsdf.osl @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "stdosl.h" #include "node_fresnel.h" +#include "stdcycles.h" shader node_velvet_bsdf(color Color = 0.8, float Sigma = 0.0, diff --git a/intern/cycles/kernel/shaders/node_vertex_color.osl b/intern/cycles/kernel/shaders/node_vertex_color.osl index 16bf3dd146e..ffaf7a2f720 100644 --- a/intern/cycles/kernel/shaders/node_vertex_color.osl +++ b/intern/cycles/kernel/shaders/node_vertex_color.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_vertex_color(string bump_offset = "center", string layer_name = "", @@ -22,7 +22,16 @@ shader node_vertex_color(string bump_offset = "center", output float Alpha = 0.0) { float vertex_color[4]; - if (getattribute(layer_name, vertex_color)) { + string vertex_color_layer; + + if (layer_name == "") { + vertex_color_layer = "geom:vertex_color"; + } + else { + vertex_color_layer = layer_name; + } + + if (getattribute(vertex_color_layer, vertex_color)) { Color = color(vertex_color[0], vertex_color[1], vertex_color[2]); Alpha = vertex_color[3]; diff --git a/intern/cycles/kernel/shaders/node_voronoi_texture.osl b/intern/cycles/kernel/shaders/node_voronoi_texture.osl index 10a9f7a6329..04d61c32f8a 100644 --- a/intern/cycles/kernel/shaders/node_voronoi_texture.osl +++ b/intern/cycles/kernel/shaders/node_voronoi_texture.osl @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "stdosl.h" +#include "node_hash.h" +#include "stdcycles.h" #include "vector2.h" #include "vector4.h" -#include "node_hash.h" #define vector3 point diff --git a/intern/cycles/kernel/shaders/node_voxel_texture.osl b/intern/cycles/kernel/shaders/node_voxel_texture.osl index 0e4484561d8..14489298367 100644 --- a/intern/cycles/kernel/shaders/node_voxel_texture.osl +++ b/intern/cycles/kernel/shaders/node_voxel_texture.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_voxel_texture(string filename = "", string interpolation = "linear", diff --git a/intern/cycles/kernel/shaders/node_wave_texture.osl b/intern/cycles/kernel/shaders/node_wave_texture.osl index a706c442368..f17397be243 100644 --- a/intern/cycles/kernel/shaders/node_wave_texture.osl +++ b/intern/cycles/kernel/shaders/node_wave_texture.osl @@ -14,45 +14,86 @@ * limitations under the License. */ -#include "stdosl.h" #include "node_noise.h" +#include "stdcycles.h" /* Wave */ -float wave(point p, string type, string profile, float detail, float distortion, float dscale) +float wave(point p_input, + string type, + string bands_direction, + string rings_direction, + string profile, + float detail, + float distortion, + float dscale, + float phase) { + /* Prevent precision issues on unit coordinates. */ + point p = (p_input + 0.000001) * 0.999999; + float n = 0.0; if (type == "bands") { - n = (p[0] + p[1] + p[2]) * 10.0; + if (bands_direction == "x") { + n = p[0] * 20.0; + } + else if (bands_direction == "y") { + n = p[1] * 20.0; + } + else if (bands_direction == "z") { + n = p[2] * 20.0; + } + else { /* diagonal */ + n = (p[0] + p[1] + p[2]) * 10.0; + } } else if (type == "rings") { - n = length(p) * 20.0; + point rp = p; + if (rings_direction == "x") { + rp *= point(0.0, 1.0, 1.0); + } + else if (rings_direction == "y") { + rp *= point(1.0, 0.0, 1.0); + } + else if (rings_direction == "z") { + rp *= point(1.0, 1.0, 0.0); + } + /* else: "spherical" */ + + n = length(rp) * 20.0; } + n += phase; + if (distortion != 0.0) { n = n + (distortion * (fractal_noise(p * dscale, detail) * 2.0 - 1.0)); } if (profile == "sine") { - return 0.5 + 0.5 * sin(n); + return 0.5 + 0.5 * sin(n - M_PI_2); + } + else if (profile == "saw") { + n /= M_2PI; + return n - floor(n); } - else { - /* Saw profile */ + else { /* profile tri */ n /= M_2PI; - n -= (int)n; - return (n < 0.0) ? n + 1.0 : n; + return abs(n - floor(n + 0.5)) * 2.0; } } shader node_wave_texture(int use_mapping = 0, matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), string type = "bands", + string bands_direction = "x", + string rings_direction = "x", string profile = "sine", float Scale = 5.0, float Distortion = 0.0, float Detail = 2.0, float DetailScale = 1.0, + float PhaseOffset = 0.0, point Vector = P, output float Fac = 0.0, output color Color = 0.0) @@ -62,6 +103,14 @@ shader node_wave_texture(int use_mapping = 0, if (use_mapping) p = transform(mapping, p); - Fac = wave(p * Scale, type, profile, Detail, Distortion, DetailScale); + Fac = wave(p * Scale, + type, + bands_direction, + rings_direction, + profile, + Detail, + Distortion, + DetailScale, + PhaseOffset); Color = Fac; } diff --git a/intern/cycles/kernel/shaders/node_wavelength.osl b/intern/cycles/kernel/shaders/node_wavelength.osl index c8c6eecb171..f484c4b4788 100644 --- a/intern/cycles/kernel/shaders/node_wavelength.osl +++ b/intern/cycles/kernel/shaders/node_wavelength.osl @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "stdosl.h" +#include "stdcycles.h" shader node_wavelength(float Wavelength = 500.0, output color Color = 0.0) { diff --git a/intern/cycles/kernel/shaders/node_white_noise_texture.osl b/intern/cycles/kernel/shaders/node_white_noise_texture.osl index 95f91d25e5e..94735a019d5 100644 --- a/intern/cycles/kernel/shaders/node_white_noise_texture.osl +++ b/intern/cycles/kernel/shaders/node_white_noise_texture.osl @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "stdosl.h" +#include "node_hash.h" +#include "stdcycles.h" #include "vector2.h" #include "vector4.h" -#include "node_hash.h" #define vector3 point diff --git a/intern/cycles/kernel/shaders/node_wireframe.osl b/intern/cycles/kernel/shaders/node_wireframe.osl index ea4bd3a4c87..673a451c928 100644 --- a/intern/cycles/kernel/shaders/node_wireframe.osl +++ b/intern/cycles/kernel/shaders/node_wireframe.osl @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "stdosl.h" #include "oslutil.h" +#include "stdcycles.h" shader node_wireframe(string bump_offset = "center", int use_pixel_size = 0, diff --git a/intern/cycles/kernel/shaders/oslutil.h b/intern/cycles/kernel/shaders/oslutil.h deleted file mode 100644 index d48bfa4a665..00000000000 --- a/intern/cycles/kernel/shaders/oslutil.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Adapted from Open Shading Language with this license: - * - * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al. - * All Rights Reserved. - * - * Modifications Copyright 2011, Blender Foundation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Sony Pictures Imageworks nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef CCL_OSLUTIL_H -#define CCL_OSLUTIL_H - -// Return wireframe opacity factor [0, 1] given a geometry type in -// ("triangles", "polygons" or "patches"), and a line_width in raster -// or world space depending on the last (raster) boolean argument. -// -float wireframe(string edge_type, float line_width, int raster) -{ - // ray differentials are so big in diffuse context that this function would always return "wire" - if (raytype("path:diffuse")) - return 0.0; - - int np = 0; - point p[64]; - float pixelWidth = 1; - - if (edge_type == "triangles") { - np = 3; - if (!getattribute("geom:trianglevertices", p)) - return 0.0; - } - else if (edge_type == "polygons" || edge_type == "patches") { - getattribute("geom:numpolyvertices", np); - if (np < 3 || !getattribute("geom:polyvertices", p)) - return 0.0; - } - - if (raster) { - // Project the derivatives of P to the viewing plane defined - // by I so we have a measure of how big is a pixel at this point - float pixelWidthX = length(Dx(P) - dot(Dx(P), I) * I); - float pixelWidthY = length(Dy(P) - dot(Dy(P), I) * I); - // Take the average of both axis' length - pixelWidth = (pixelWidthX + pixelWidthY) / 2; - } - - // Use half the width as the neighbor face will render the - // other half. And take the square for fast comparison - pixelWidth *= 0.5 * line_width; - pixelWidth *= pixelWidth; - for (int i = 0; i < np; i++) { - int i2 = i ? i - 1 : np - 1; - vector dir = P - p[i]; - vector edge = p[i] - p[i2]; - vector crs = cross(edge, dir); - // At this point dot(crs, crs) / dot(edge, edge) is - // the square of area / length(edge) == square of the - // distance to the edge. - if (dot(crs, crs) < (dot(edge, edge) * pixelWidth)) - return 1; - } - return 0; -} - -float wireframe(string edge_type, float line_width) -{ - return wireframe(edge_type, line_width, 1); -} -float wireframe(string edge_type) -{ - return wireframe(edge_type, 1.0, 1); -} -float wireframe() -{ - return wireframe("polygons", 1.0, 1); -} - -#endif /* CCL_OSLUTIL_H */ diff --git a/intern/cycles/kernel/shaders/stdcycles.h b/intern/cycles/kernel/shaders/stdcycles.h new file mode 100644 index 00000000000..dd604da68ce --- /dev/null +++ b/intern/cycles/kernel/shaders/stdcycles.h @@ -0,0 +1,150 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Sony Pictures Imageworks nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CCL_STDCYCLESOSL_H +#define CCL_STDCYCLESOSL_H + +#include "stdosl.h" + +// Declaration of built-in functions and closures, stdosl.h does not make +// these available so we have to redefine them. +#define BUILTIN [[int builtin = 1]] +#define BUILTIN_DERIV [[ int builtin = 1, int deriv = 1 ]] + +closure color diffuse_ramp(normal N, color colors[8]) BUILTIN; +closure color phong_ramp(normal N, float exponent, color colors[8]) BUILTIN; +closure color diffuse_toon(normal N, float size, float smooth) BUILTIN; +closure color glossy_toon(normal N, float size, float smooth) BUILTIN; +closure color microfacet_ggx(normal N, float ag) BUILTIN; +closure color microfacet_ggx_aniso(normal N, vector T, float ax, float ay) BUILTIN; +closure color microfacet_ggx_refraction(normal N, float ag, float eta) BUILTIN; +closure color microfacet_multi_ggx(normal N, float ag, color C) BUILTIN; +closure color microfacet_multi_ggx_aniso(normal N, vector T, float ax, float ay, color C) BUILTIN; +closure color microfacet_multi_ggx_glass(normal N, float ag, float eta, color C) BUILTIN; +closure color microfacet_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN; +closure color microfacet_ggx_aniso_fresnel( + normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN; +closure color +microfacet_multi_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN; +closure color microfacet_multi_ggx_aniso_fresnel( + normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN; +closure color +microfacet_multi_ggx_glass_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN; +closure color microfacet_beckmann(normal N, float ab) BUILTIN; +closure color microfacet_beckmann_aniso(normal N, vector T, float ax, float ay) BUILTIN; +closure color microfacet_beckmann_refraction(normal N, float ab, float eta) BUILTIN; +closure color ashikhmin_shirley(normal N, vector T, float ax, float ay) BUILTIN; +closure color ashikhmin_velvet(normal N, float sigma) BUILTIN; +closure color ambient_occlusion() BUILTIN; +closure color principled_diffuse(normal N, float roughness) BUILTIN; +closure color principled_sheen(normal N) BUILTIN; +closure color principled_clearcoat(normal N, float clearcoat, float clearcoat_roughness) BUILTIN; + +// BSSRDF +closure color bssrdf(string method, normal N, vector radius, color albedo) BUILTIN; + +// Hair +closure color +hair_reflection(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN; +closure color +hair_transmission(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN; +closure color principled_hair(normal N, + color sigma, + float roughnessu, + float roughnessv, + float coat, + float alpha, + float eta) BUILTIN; + +// Volume +closure color henyey_greenstein(float g) BUILTIN; +closure color absorption() BUILTIN; + +normal ensure_valid_reflection(normal Ng, vector I, normal N) +{ + /* The implementation here mirrors the one in kernel_montecarlo.h, + * check there for an explanation of the algorithm. */ + + float sqr(float x) + { + return x * x; + } + + vector R = 2 * dot(N, I) * N - I; + + float threshold = min(0.9 * dot(Ng, I), 0.01); + if (dot(Ng, R) >= threshold) { + return N; + } + + float NdotNg = dot(N, Ng); + vector X = normalize(N - NdotNg * Ng); + + float Ix = dot(I, X), Iz = dot(I, Ng); + float Ix2 = sqr(Ix), Iz2 = sqr(Iz); + float a = Ix2 + Iz2; + + float b = sqrt(Ix2 * (a - sqr(threshold))); + float c = Iz * threshold + a; + + float fac = 0.5 / a; + float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c); + int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5)); + int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5)); + + float N_new_x, N_new_z; + if (valid1 && valid2) { + float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2); + float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2); + + float R1 = 2 * (N1_x * Ix + N1_z * Iz) * N1_z - Iz; + float R2 = 2 * (N2_x * Ix + N2_z * Iz) * N2_z - Iz; + + valid1 = (R1 >= 1e-5); + valid2 = (R2 >= 1e-5); + if (valid1 && valid2) { + N_new_x = (R1 < R2) ? N1_x : N2_x; + N_new_z = (R1 < R2) ? N1_z : N2_z; + } + else { + N_new_x = (R1 > R2) ? N1_x : N2_x; + N_new_z = (R1 > R2) ? N1_z : N2_z; + } + } + else if (valid1 || valid2) { + float Nz2 = valid1 ? N1_z2 : N2_z2; + N_new_x = sqrt(1.0 - Nz2); + N_new_z = sqrt(Nz2); + } + else { + return Ng; + } + + return N_new_x * X + N_new_z * Ng; +} + +#endif /* CCL_STDOSL_H */ diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h deleted file mode 100644 index 6515d914909..00000000000 --- a/intern/cycles/kernel/shaders/stdosl.h +++ /dev/null @@ -1,880 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// * Neither the name of Sony Pictures Imageworks nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CCL_STDOSL_H -#define CCL_STDOSL_H - -#ifndef M_PI -# define M_PI 3.1415926535897932 /* pi */ -# define M_PI_2 1.5707963267948966 /* pi/2 */ -# define M_PI_4 0.7853981633974483 /* pi/4 */ -# define M_2_PI 0.6366197723675813 /* 2/pi */ -# define M_2PI 6.2831853071795865 /* 2*pi */ -# define M_4PI 12.566370614359173 /* 4*pi */ -# define M_2_SQRTPI 1.1283791670955126 /* 2/sqrt(pi) */ -# define M_E 2.7182818284590452 /* e (Euler's number) */ -# define M_LN2 0.6931471805599453 /* ln(2) */ -# define M_LN10 2.3025850929940457 /* ln(10) */ -# define M_LOG2E 1.4426950408889634 /* log_2(e) */ -# define M_LOG10E 0.4342944819032518 /* log_10(e) */ -# define M_SQRT2 1.4142135623730950 /* sqrt(2) */ -# define M_SQRT1_2 0.7071067811865475 /* 1/sqrt(2) */ -#endif - -// Declaration of built-in functions and closures -#define BUILTIN [[int builtin = 1]] -#define BUILTIN_DERIV [[ int builtin = 1, int deriv = 1 ]] - -#define PERCOMP1(name) \ - normal name(normal x) BUILTIN; \ - vector name(vector x) BUILTIN; \ - point name(point x) BUILTIN; \ - color name(color x) BUILTIN; \ - float name(float x) BUILTIN; - -#define PERCOMP2(name) \ - normal name(normal x, normal y) BUILTIN; \ - vector name(vector x, vector y) BUILTIN; \ - point name(point x, point y) BUILTIN; \ - color name(color x, color y) BUILTIN; \ - float name(float x, float y) BUILTIN; - -#define PERCOMP2F(name) \ - normal name(normal x, float y) BUILTIN; \ - vector name(vector x, float y) BUILTIN; \ - point name(point x, float y) BUILTIN; \ - color name(color x, float y) BUILTIN; \ - float name(float x, float y) BUILTIN; - -// Basic math -normal degrees(normal x) -{ - return x * (180.0 / M_PI); -} -vector degrees(vector x) -{ - return x * (180.0 / M_PI); -} -point degrees(point x) -{ - return x * (180.0 / M_PI); -} -color degrees(color x) -{ - return x * (180.0 / M_PI); -} -float degrees(float x) -{ - return x * (180.0 / M_PI); -} -normal radians(normal x) -{ - return x * (M_PI / 180.0); -} -vector radians(vector x) -{ - return x * (M_PI / 180.0); -} -point radians(point x) -{ - return x * (M_PI / 180.0); -} -color radians(color x) -{ - return x * (M_PI / 180.0); -} -float radians(float x) -{ - return x * (M_PI / 180.0); -} -PERCOMP1(cos) -PERCOMP1(sin) -PERCOMP1(tan) -PERCOMP1(acos) -PERCOMP1(asin) -PERCOMP1(atan) -PERCOMP2(atan2) -PERCOMP1(cosh) -PERCOMP1(sinh) -PERCOMP1(tanh) -PERCOMP2F(pow) -PERCOMP1(exp) -PERCOMP1(exp2) -PERCOMP1(expm1) -PERCOMP1(log) -point log(point a, float b) -{ - return log(a) / log(b); -} -vector log(vector a, float b) -{ - return log(a) / log(b); -} -color log(color a, float b) -{ - return log(a) / log(b); -} -float log(float a, float b) -{ - return log(a) / log(b); -} -PERCOMP1(log2) -PERCOMP1(log10) -PERCOMP1(logb) -PERCOMP1(sqrt) -PERCOMP1(inversesqrt) -float hypot(float a, float b) -{ - return sqrt(a * a + b * b); -} -float hypot(float a, float b, float c) -{ - return sqrt(a * a + b * b + c * c); -} -PERCOMP1(abs) -int abs(int x) BUILTIN; -PERCOMP1(fabs) -int fabs(int x) BUILTIN; -PERCOMP1(sign) -PERCOMP1(floor) -PERCOMP1(ceil) -PERCOMP1(round) -PERCOMP1(trunc) -PERCOMP2(fmod) -PERCOMP2F(fmod) -int mod(int a, int b) -{ - return a - b * (int)floor(a / b); -} -point mod(point a, point b) -{ - return a - b * floor(a / b); -} -vector mod(vector a, vector b) -{ - return a - b * floor(a / b); -} -normal mod(normal a, normal b) -{ - return a - b * floor(a / b); -} -color mod(color a, color b) -{ - return a - b * floor(a / b); -} -point mod(point a, float b) -{ - return a - b * floor(a / b); -} -vector mod(vector a, float b) -{ - return a - b * floor(a / b); -} -normal mod(normal a, float b) -{ - return a - b * floor(a / b); -} -color mod(color a, float b) -{ - return a - b * floor(a / b); -} -float mod(float a, float b) -{ - return a - b * floor(a / b); -} -PERCOMP2(min) -int min(int a, int b) BUILTIN; -PERCOMP2(max) -int max(int a, int b) BUILTIN; -normal clamp(normal x, normal minval, normal maxval) -{ - return max(min(x, maxval), minval); -} -vector clamp(vector x, vector minval, vector maxval) -{ - return max(min(x, maxval), minval); -} -point clamp(point x, point minval, point maxval) -{ - return max(min(x, maxval), minval); -} -color clamp(color x, color minval, color maxval) -{ - return max(min(x, maxval), minval); -} -float clamp(float x, float minval, float maxval) -{ - return max(min(x, maxval), minval); -} -int clamp(int x, int minval, int maxval) -{ - return max(min(x, maxval), minval); -} -#if 0 -normal mix(normal x, normal y, normal a) -{ - return x * (1 - a) + y * a; -} -normal mix(normal x, normal y, float a) -{ - return x * (1 - a) + y * a; -} -vector mix(vector x, vector y, vector a) -{ - return x * (1 - a) + y * a; -} -vector mix(vector x, vector y, float a) -{ - return x * (1 - a) + y * a; -} -point mix(point x, point y, point a) -{ - return x * (1 - a) + y * a; -} -point mix(point x, point y, float a) -{ - return x * (1 - a) + y * a; -} -color mix(color x, color y, color a) -{ - return x * (1 - a) + y * a; -} -color mix(color x, color y, float a) -{ - return x * (1 - a) + y * a; -} -float mix(float x, float y, float a) -{ - return x * (1 - a) + y * a; -} -#else -normal mix(normal x, normal y, normal a) BUILTIN; -normal mix(normal x, normal y, float a) BUILTIN; -vector mix(vector x, vector y, vector a) BUILTIN; -vector mix(vector x, vector y, float a) BUILTIN; -point mix(point x, point y, point a) BUILTIN; -point mix(point x, point y, float a) BUILTIN; -color mix(color x, color y, color a) BUILTIN; -color mix(color x, color y, float a) BUILTIN; -float mix(float x, float y, float a) BUILTIN; -#endif -int isnan(float x) BUILTIN; -int isinf(float x) BUILTIN; -int isfinite(float x) BUILTIN; -float erf(float x) BUILTIN; -float erfc(float x) BUILTIN; - -// Vector functions - -vector cross(vector a, vector b) BUILTIN; -float dot(vector a, vector b) BUILTIN; -float length(vector v) BUILTIN; -float distance(point a, point b) BUILTIN; -float distance(point a, point b, point q) -{ - vector d = b - a; - float dd = dot(d, d); - if (dd == 0.0) - return distance(q, a); - float t = dot(q - a, d) / dd; - return distance(q, a + clamp(t, 0.0, 1.0) * d); -} -normal normalize(normal v) BUILTIN; -vector normalize(vector v) BUILTIN; -vector faceforward(vector N, vector I, vector Nref) BUILTIN; -vector faceforward(vector N, vector I) BUILTIN; -vector reflect(vector I, vector N) -{ - return I - 2 * dot(N, I) * N; -} -vector refract(vector I, vector N, float eta) -{ - float IdotN = dot(I, N); - float k = 1 - eta * eta * (1 - IdotN * IdotN); - return (k < 0) ? vector(0, 0, 0) : (eta * I - N * (eta * IdotN + sqrt(k))); -} -void fresnel(vector I, - normal N, - float eta, - output float Kr, - output float Kt, - output vector R, - output vector T) -{ - float sqr(float x) - { - return x * x; - } - float c = dot(I, N); - if (c < 0) - c = -c; - R = reflect(I, N); - float g = 1.0 / sqr(eta) - 1.0 + c * c; - if (g >= 0.0) { - g = sqrt(g); - float beta = g - c; - float F = (c * (g + c) - 1.0) / (c * beta + 1.0); - F = 0.5 * (1.0 + sqr(F)); - F *= sqr(beta / (g + c)); - Kr = F; - Kt = (1.0 - Kr) * eta * eta; - // OPT: the following recomputes some of the above values, but it - // gives us the same result as if the shader-writer called refract() - T = refract(I, N, eta); - } - else { - // total internal reflection - Kr = 1.0; - Kt = 0.0; - T = vector(0, 0, 0); - } -} - -void fresnel(vector I, normal N, float eta, output float Kr, output float Kt) -{ - vector R, T; - fresnel(I, N, eta, Kr, Kt, R, T); -} - -normal transform(matrix Mto, normal p) BUILTIN; -vector transform(matrix Mto, vector p) BUILTIN; -point transform(matrix Mto, point p) BUILTIN; -normal transform(string from, string to, normal p) BUILTIN; -vector transform(string from, string to, vector p) BUILTIN; -point transform(string from, string to, point p) BUILTIN; -normal transform(string to, normal p) -{ - return transform("common", to, p); -} -vector transform(string to, vector p) -{ - return transform("common", to, p); -} -point transform(string to, point p) -{ - return transform("common", to, p); -} - -float transformu(string tounits, float x) BUILTIN; -float transformu(string fromunits, string tounits, float x) BUILTIN; - -point rotate(point p, float angle, point a, point b) -{ - vector axis = normalize(b - a); - float cosang, sinang; - /* Older OSX has major issues with sincos() function, - * it's likely a big in OSL or LLVM. For until we've - * updated to new versions of this libraries we'll - * use a workaround to prevent possible crashes on all - * the platforms. - * - * Shouldn't be that bad because it's mainly used for - * anisotropic shader where angle is usually constant. - */ -#if 0 - sincos(angle, sinang, cosang); -#else - sinang = sin(angle); - cosang = cos(angle); -#endif - float cosang1 = 1.0 - cosang; - float x = axis[0], y = axis[1], z = axis[2]; - matrix M = matrix(x * x + (1.0 - x * x) * cosang, - x * y * cosang1 + z * sinang, - x * z * cosang1 - y * sinang, - 0.0, - x * y * cosang1 - z * sinang, - y * y + (1.0 - y * y) * cosang, - y * z * cosang1 + x * sinang, - 0.0, - x * z * cosang1 + y * sinang, - y * z * cosang1 - x * sinang, - z * z + (1.0 - z * z) * cosang, - 0.0, - 0.0, - 0.0, - 0.0, - 1.0); - return transform(M, p - a) + a; -} - -normal ensure_valid_reflection(normal Ng, vector I, normal N) -{ - /* The implementation here mirrors the one in kernel_montecarlo.h, - * check there for an explanation of the algorithm. */ - - float sqr(float x) - { - return x * x; - } - - vector R = 2 * dot(N, I) * N - I; - - float threshold = min(0.9 * dot(Ng, I), 0.01); - if (dot(Ng, R) >= threshold) { - return N; - } - - float NdotNg = dot(N, Ng); - vector X = normalize(N - NdotNg * Ng); - - float Ix = dot(I, X), Iz = dot(I, Ng); - float Ix2 = sqr(Ix), Iz2 = sqr(Iz); - float a = Ix2 + Iz2; - - float b = sqrt(Ix2 * (a - sqr(threshold))); - float c = Iz * threshold + a; - - float fac = 0.5 / a; - float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c); - int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5)); - int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5)); - - float N_new_x, N_new_z; - if (valid1 && valid2) { - float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2); - float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2); - - float R1 = 2 * (N1_x * Ix + N1_z * Iz) * N1_z - Iz; - float R2 = 2 * (N2_x * Ix + N2_z * Iz) * N2_z - Iz; - - valid1 = (R1 >= 1e-5); - valid2 = (R2 >= 1e-5); - if (valid1 && valid2) { - N_new_x = (R1 < R2) ? N1_x : N2_x; - N_new_z = (R1 < R2) ? N1_z : N2_z; - } - else { - N_new_x = (R1 > R2) ? N1_x : N2_x; - N_new_z = (R1 > R2) ? N1_z : N2_z; - } - } - else if (valid1 || valid2) { - float Nz2 = valid1 ? N1_z2 : N2_z2; - N_new_x = sqrt(1.0 - Nz2); - N_new_z = sqrt(Nz2); - } - else { - return Ng; - } - - return N_new_x * X + N_new_z * Ng; -} - -// Color functions - -float luminance(color c) BUILTIN; -color blackbody(float temperatureK) BUILTIN; -color wavelength_color(float wavelength_nm) BUILTIN; - -color transformc(string to, color x) -{ - color rgb_to_hsv(color rgb) - { // See Foley & van Dam - float r = rgb[0], g = rgb[1], b = rgb[2]; - float mincomp = min(r, min(g, b)); - float maxcomp = max(r, max(g, b)); - float delta = maxcomp - mincomp; // chroma - float h, s, v; - v = maxcomp; - if (maxcomp > 0) - s = delta / maxcomp; - else - s = 0; - if (s <= 0) - h = 0; - else { - if (r >= maxcomp) - h = (g - b) / delta; - else if (g >= maxcomp) - h = 2 + (b - r) / delta; - else - h = 4 + (r - g) / delta; - h /= 6; - if (h < 0) - h += 1; - } - return color(h, s, v); - } - - color rgb_to_hsl(color rgb) - { // See Foley & van Dam - // First convert rgb to hsv, then to hsl - float minval = min(rgb[0], min(rgb[1], rgb[2])); - color hsv = rgb_to_hsv(rgb); - float maxval = hsv[2]; // v == maxval - float h = hsv[0], s, l = (minval + maxval) / 2; - if (minval == maxval) - s = 0; // special 'achromatic' case, hue is 0 - else if (l <= 0.5) - s = (maxval - minval) / (maxval + minval); - else - s = (maxval - minval) / (2 - maxval - minval); - return color(h, s, l); - } - - color r; - if (to == "rgb" || to == "RGB") - r = x; - else if (to == "hsv") - r = rgb_to_hsv(x); - else if (to == "hsl") - r = rgb_to_hsl(x); - else if (to == "YIQ") - r = color(dot(vector(0.299, 0.587, 0.114), (vector)x), - dot(vector(0.596, -0.275, -0.321), (vector)x), - dot(vector(0.212, -0.523, 0.311), (vector)x)); - else if (to == "XYZ") - r = color(dot(vector(0.412453, 0.357580, 0.180423), (vector)x), - dot(vector(0.212671, 0.715160, 0.072169), (vector)x), - dot(vector(0.019334, 0.119193, 0.950227), (vector)x)); - else { - error("Unknown color space \"%s\"", to); - r = x; - } - return r; -} - -color transformc(string from, string to, color x) -{ - color hsv_to_rgb(color c) - { // Reference: Foley & van Dam - float h = c[0], s = c[1], v = c[2]; - color r; - if (s < 0.0001) { - r = v; - } - else { - h = 6 * (h - floor(h)); // expand to [0..6) - int hi = (int)h; - float f = h - hi; - float p = v * (1 - s); - float q = v * (1 - s * f); - float t = v * (1 - s * (1 - f)); - if (hi == 0) - r = color(v, t, p); - else if (hi == 1) - r = color(q, v, p); - else if (hi == 2) - r = color(p, v, t); - else if (hi == 3) - r = color(p, q, v); - else if (hi == 4) - r = color(t, p, v); - else - r = color(v, p, q); - } - return r; - } - - color hsl_to_rgb(color c) - { - float h = c[0], s = c[1], l = c[2]; - // Easiest to convert hsl -> hsv, then hsv -> RGB (per Foley & van Dam) - float v = (l <= 0.5) ? (l * (1 + s)) : (l * (1 - s) + s); - color r; - if (v <= 0) { - r = 0; - } - else { - float min = 2 * l - v; - s = (v - min) / v; - r = hsv_to_rgb(color(h, s, v)); - } - return r; - } - - color r; - if (from == "rgb" || from == "RGB") - r = x; - else if (from == "hsv") - r = hsv_to_rgb(x); - else if (from == "hsl") - r = hsl_to_rgb(x); - else if (from == "YIQ") - r = color(dot(vector(1, 0.9557, 0.6199), (vector)x), - dot(vector(1, -0.2716, -0.6469), (vector)x), - dot(vector(1, -1.1082, 1.7051), (vector)x)); - else if (from == "XYZ") - r = color(dot(vector(3.240479, -1.537150, -0.498535), (vector)x), - dot(vector(-0.969256, 1.875991, 0.041556), (vector)x), - dot(vector(0.055648, -0.204043, 1.057311), (vector)x)); - else { - error("Unknown color space \"%s\"", to); - r = x; - } - return transformc(to, r); -} - -// Matrix functions - -float determinant(matrix m) BUILTIN; -matrix transpose(matrix m) BUILTIN; - -// Pattern generation - -color step(color edge, color x) BUILTIN; -point step(point edge, point x) BUILTIN; -vector step(vector edge, vector x) BUILTIN; -normal step(normal edge, normal x) BUILTIN; -float step(float edge, float x) BUILTIN; -float smoothstep(float edge0, float edge1, float x) BUILTIN; - -float linearstep(float edge0, float edge1, float x) -{ - float result; - if (edge0 != edge1) { - float xclamped = clamp(x, edge0, edge1); - result = (xclamped - edge0) / (edge1 - edge0); - } - else { // special case: edges coincide - result = step(edge0, x); - } - return result; -} - -float smooth_linearstep(float edge0, float edge1, float x_, float eps_) -{ - float result; - if (edge0 != edge1) { - float rampup(float x, float r) - { - return 0.5 / r * x * x; - } - float width_inv = 1.0 / (edge1 - edge0); - float eps = eps_ * width_inv; - float x = (x_ - edge0) * width_inv; - if (x <= -eps) - result = 0; - else if (x >= eps && x <= 1.0 - eps) - result = x; - else if (x >= 1.0 + eps) - result = 1; - else if (x < eps) - result = rampup(x + eps, 2.0 * eps); - else /* if (x < 1.0+eps) */ - result = 1.0 - rampup(1.0 + eps - x, 2.0 * eps); - } - else { - result = step(edge0, x_); - } - return result; -} - -float aastep(float edge, float s, float dedge, float ds) -{ - // Box filtered AA step - float width = fabs(dedge) + fabs(ds); - float halfwidth = 0.5 * width; - float e1 = edge - halfwidth; - return (s <= e1) ? 0.0 : ((s >= (edge + halfwidth)) ? 1.0 : (s - e1) / width); -} -float aastep(float edge, float s, float ds) -{ - return aastep(edge, s, filterwidth(edge), ds); -} -float aastep(float edge, float s) -{ - return aastep(edge, s, filterwidth(edge), filterwidth(s)); -} - -// Derivatives and area operators - -// Displacement functions - -// String functions -int strlen(string s) BUILTIN; -int hash(string s) BUILTIN; -int getchar(string s, int index) BUILTIN; -int startswith(string s, string prefix) BUILTIN; -int endswith(string s, string suffix) BUILTIN; -string substr(string s, int start, int len) BUILTIN; -string substr(string s, int start) -{ - return substr(s, start, strlen(s)); -} -float stof(string str) BUILTIN; -int stoi(string str) BUILTIN; - -// Define concat in terms of shorter concat -string concat(string a, string b, string c) -{ - return concat(concat(a, b), c); -} -string concat(string a, string b, string c, string d) -{ - return concat(concat(a, b, c), d); -} -string concat(string a, string b, string c, string d, string e) -{ - return concat(concat(a, b, c, d), e); -} -string concat(string a, string b, string c, string d, string e, string f) -{ - return concat(concat(a, b, c, d, e), f); -} - -// Texture - -// Closures - -closure color diffuse(normal N) BUILTIN; -closure color oren_nayar(normal N, float sigma) BUILTIN; -closure color diffuse_ramp(normal N, color colors[8]) BUILTIN; -closure color phong_ramp(normal N, float exponent, color colors[8]) BUILTIN; -closure color diffuse_toon(normal N, float size, float smooth) BUILTIN; -closure color glossy_toon(normal N, float size, float smooth) BUILTIN; -closure color translucent(normal N) BUILTIN; -closure color reflection(normal N) BUILTIN; -closure color refraction(normal N, float eta) BUILTIN; -closure color transparent() BUILTIN; -closure color microfacet_ggx(normal N, float ag) BUILTIN; -closure color microfacet_ggx_aniso(normal N, vector T, float ax, float ay) BUILTIN; -closure color microfacet_ggx_refraction(normal N, float ag, float eta) BUILTIN; -closure color microfacet_multi_ggx(normal N, float ag, color C) BUILTIN; -closure color microfacet_multi_ggx_aniso(normal N, vector T, float ax, float ay, color C) BUILTIN; -closure color microfacet_multi_ggx_glass(normal N, float ag, float eta, color C) BUILTIN; -closure color microfacet_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN; -closure color microfacet_ggx_aniso_fresnel( - normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN; -closure color -microfacet_multi_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN; -closure color microfacet_multi_ggx_aniso_fresnel( - normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN; -closure color -microfacet_multi_ggx_glass_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN; -closure color microfacet_beckmann(normal N, float ab) BUILTIN; -closure color microfacet_beckmann_aniso(normal N, vector T, float ax, float ay) BUILTIN; -closure color microfacet_beckmann_refraction(normal N, float ab, float eta) BUILTIN; -closure color ashikhmin_shirley(normal N, vector T, float ax, float ay) BUILTIN; -closure color ashikhmin_velvet(normal N, float sigma) BUILTIN; -closure color emission() BUILTIN; -closure color background() BUILTIN; -closure color holdout() BUILTIN; -closure color ambient_occlusion() BUILTIN; -closure color principled_diffuse(normal N, float roughness) BUILTIN; -closure color principled_sheen(normal N) BUILTIN; -closure color principled_clearcoat(normal N, float clearcoat, float clearcoat_roughness) BUILTIN; - -// BSSRDF -closure color bssrdf(string method, normal N, vector radius, color albedo) BUILTIN; - -// Hair -closure color -hair_reflection(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN; -closure color -hair_transmission(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN; -closure color principled_hair(normal N, - color sigma, - float roughnessu, - float roughnessv, - float coat, - float alpha, - float eta) BUILTIN; - -// Volume -closure color henyey_greenstein(float g) BUILTIN; -closure color absorption() BUILTIN; - -// OSL 1.5 Microfacet functions -closure color microfacet( - string distribution, normal N, vector U, float xalpha, float yalpha, float eta, int refract) -{ - /* GGX */ - if (distribution == "ggx" || distribution == "default") { - if (!refract) { - if (xalpha == yalpha) { - /* Isotropic */ - return microfacet_ggx(N, xalpha); - } - else { - /* Anisotropic */ - return microfacet_ggx_aniso(N, U, xalpha, yalpha); - } - } - else { - return microfacet_ggx_refraction(N, xalpha, eta); - } - } - /* Beckmann */ - else { - if (!refract) { - if (xalpha == yalpha) { - /* Isotropic */ - return microfacet_beckmann(N, xalpha); - } - else { - /* Anisotropic */ - return microfacet_beckmann_aniso(N, U, xalpha, yalpha); - } - } - else { - return microfacet_beckmann_refraction(N, xalpha, eta); - } - } -} - -closure color microfacet(string distribution, normal N, float alpha, float eta, int refract) -{ - return microfacet(distribution, N, vector(0), alpha, alpha, eta, refract); -} - -// Renderer state -int backfacing() BUILTIN; -int raytype(string typename) BUILTIN; -// the individual 'isFOOray' functions are deprecated -int iscameraray() -{ - return raytype("camera"); -} -int isdiffuseray() -{ - return raytype("diffuse"); -} -int isglossyray() -{ - return raytype("glossy"); -} -int isshadowray() -{ - return raytype("shadow"); -} -int getmatrix(string fromspace, string tospace, output matrix M) BUILTIN; -int getmatrix(string fromspace, output matrix M) -{ - return getmatrix(fromspace, "common", M); -} - -// Miscellaneous - -#undef BUILTIN -#undef BUILTIN_DERIV -#undef PERCOMP1 -#undef PERCOMP2 -#undef PERCOMP2F - -#endif /* CCL_STDOSL_H */ diff --git a/intern/cycles/kernel/shaders/vector2.h b/intern/cycles/kernel/shaders/vector2.h deleted file mode 100644 index c524735d892..00000000000 --- a/intern/cycles/kernel/shaders/vector2.h +++ /dev/null @@ -1,291 +0,0 @@ -// Open Shading Language : Copyright (c) 2009-2017 Sony Pictures Imageworks Inc., et al. -// https://github.com/imageworks/OpenShadingLanguage/blob/master/LICENSE - -#pragma once -#define VECTOR2_H - -// vector2 is a 2D vector -struct vector2 { - float x; - float y; -}; - -// -// For vector2, define math operators to match vector -// - -vector2 __operator__neg__(vector2 a) -{ - return vector2(-a.x, -a.y); -} - -vector2 __operator__add__(vector2 a, vector2 b) -{ - return vector2(a.x + b.x, a.y + b.y); -} - -vector2 __operator__add__(vector2 a, int b) -{ - return a + vector2(b, b); -} - -vector2 __operator__add__(vector2 a, float b) -{ - return a + vector2(b, b); -} - -vector2 __operator__add__(int a, vector2 b) -{ - return vector2(a, a) + b; -} - -vector2 __operator__add__(float a, vector2 b) -{ - return vector2(a, a) + b; -} - -vector2 __operator__sub__(vector2 a, vector2 b) -{ - return vector2(a.x - b.x, a.y - b.y); -} - -vector2 __operator__sub__(vector2 a, int b) -{ - return a - vector2(b, b); -} - -vector2 __operator__sub__(vector2 a, float b) -{ - return a - vector2(b, b); -} - -vector2 __operator__sub__(int a, vector2 b) -{ - return vector2(a, a) - b; -} - -vector2 __operator__sub__(float a, vector2 b) -{ - return vector2(a, a) - b; -} - -vector2 __operator__mul__(vector2 a, vector2 b) -{ - return vector2(a.x * b.x, a.y * b.y); -} - -vector2 __operator__mul__(vector2 a, int b) -{ - return a * vector2(b, b); -} - -vector2 __operator__mul__(vector2 a, float b) -{ - return a * vector2(b, b); -} - -vector2 __operator__mul__(int a, vector2 b) -{ - return b * vector2(a, a); -} - -vector2 __operator__mul__(float a, vector2 b) -{ - return b * vector2(a, a); -} - -vector2 __operator__div__(vector2 a, vector2 b) -{ - return vector2(a.x / b.x, a.y / b.y); -} - -vector2 __operator__div__(vector2 a, int b) -{ - float b_inv = 1 / b; - return a * vector2(b_inv, b_inv); -} - -vector2 __operator__div__(vector2 a, float b) -{ - float b_inv = 1 / b; - return a * vector2(b_inv, b_inv); -} - -vector2 __operator__div__(int a, vector2 b) -{ - return vector2(a, a) / b; -} - -vector2 __operator__div__(float a, vector2 b) -{ - return vector2(a, a) / b; -} - -int __operator__eq__(vector2 a, vector2 b) -{ - return (a.x == b.x) && (a.y == b.y); -} - -int __operator__ne__(vector2 a, vector2 b) -{ - return (a.x != b.x) || (a.y != b.y); -} - -// -// For vector2, define most of the stdosl functions to match vector -// - -vector2 abs(vector2 a) -{ - return vector2(abs(a.x), abs(a.y)); -} - -vector2 ceil(vector2 a) -{ - return vector2(ceil(a.x), ceil(a.y)); -} - -vector2 floor(vector2 a) -{ - return vector2(floor(a.x), floor(a.y)); -} - -vector2 sqrt(vector2 a) -{ - return vector2(sqrt(a.x), sqrt(a.y)); -} - -vector2 exp(vector2 a) -{ - return vector2(exp(a.x), exp(a.y)); -} - -vector2 log(vector2 a) -{ - return vector2(log(a.x), log(a.y)); -} - -vector2 log2(vector2 a) -{ - return vector2(log2(a.x), log2(a.y)); -} - -vector2 mix(vector2 a, vector2 b, float x) -{ - return vector2(mix(a.x, b.x, x), mix(a.y, b.y, x)); -} - -float dot(vector2 a, vector2 b) -{ - return (a.x * b.x + a.y * b.y); -} - -float length(vector2 a) -{ - return hypot(a.x, a.y); -} - -vector2 smoothstep(vector2 low, vector2 high, vector2 in) -{ - return vector2(smoothstep(low.x, high.x, in.x), smoothstep(low.y, high.y, in.y)); -} - -vector2 smoothstep(float low, float high, vector2 in) -{ - return vector2(smoothstep(low, high, in.x), smoothstep(low, high, in.y)); -} - -vector2 clamp(vector2 in, vector2 low, vector2 high) -{ - return vector2(clamp(in.x, low.x, high.x), clamp(in.y, low.y, high.y)); -} - -vector2 clamp(vector2 in, float low, float high) -{ - return clamp(in, vector2(low, low), vector2(high, high)); -} - -vector2 max(vector2 a, vector2 b) -{ - return vector2(max(a.x, b.x), max(a.y, b.y)); -} - -vector2 max(vector2 a, float b) -{ - return max(a, vector2(b, b)); -} - -vector2 normalize(vector2 a) -{ - return a / length(a); -} - -vector2 min(vector2 a, vector2 b) -{ - return vector2(min(a.x, a.x), min(b.y, b.y)); -} - -vector2 min(vector2 a, float b) -{ - return min(a, vector2(b, b)); -} - -vector2 fmod(vector2 a, vector2 b) -{ - return vector2(fmod(a.x, b.x), fmod(a.y, b.y)); -} - -vector2 fmod(vector2 a, float b) -{ - return fmod(a, vector2(b, b)); -} - -vector2 pow(vector2 in, vector2 amount) -{ - return vector2(pow(in.x, amount.x), pow(in.y, amount.y)); -} - -vector2 pow(vector2 in, float amount) -{ - return pow(in, vector2(amount, amount)); -} - -vector2 sign(vector2 a) -{ - return vector2(sign(a.x), sign(a.y)); -} - -vector2 sin(vector2 a) -{ - return vector2(sin(a.x), sin(a.y)); -} - -vector2 cos(vector2 a) -{ - return vector2(cos(a.x), cos(a.y)); -} - -vector2 tan(vector2 a) -{ - return vector2(tan(a.x), tan(a.y)); -} - -vector2 asin(vector2 a) -{ - return vector2(asin(a.x), asin(a.y)); -} - -vector2 acos(vector2 a) -{ - return vector2(acos(a.x), acos(a.y)); -} - -vector2 atan2(vector2 a, float f) -{ - return vector2(atan2(a.x, f), atan2(a.y, f)); -} - -vector2 atan2(vector2 a, vector2 b) -{ - return vector2(atan2(a.x, b.x), atan2(a.y, b.y)); -} diff --git a/intern/cycles/kernel/shaders/vector4.h b/intern/cycles/kernel/shaders/vector4.h deleted file mode 100644 index 58e1b3c2e23..00000000000 --- a/intern/cycles/kernel/shaders/vector4.h +++ /dev/null @@ -1,327 +0,0 @@ -// Open Shading Language : Copyright (c) 2009-2017 Sony Pictures Imageworks Inc., et al. -// https://github.com/imageworks/OpenShadingLanguage/blob/master/LICENSE - -#pragma once -#define VECTOR4_H - -// vector4 is a 4D vector -struct vector4 { - float x; - float y; - float z; - float w; -}; - -// -// For vector4, define math operators to match vector -// - -vector4 __operator__neg__(vector4 a) -{ - return vector4(-a.x, -a.y, -a.z, -a.w); -} - -vector4 __operator__add__(vector4 a, vector4 b) -{ - return vector4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); -} - -vector4 __operator__add__(vector4 a, int b) -{ - return a + vector4(b, b, b, b); -} - -vector4 __operator__add__(vector4 a, float b) -{ - return a + vector4(b, b, b, b); -} - -vector4 __operator__add__(int a, vector4 b) -{ - return vector4(a, a, a, a) + b; -} - -vector4 __operator__add__(float a, vector4 b) -{ - return vector4(a, a, a, a) + b; -} - -vector4 __operator__sub__(vector4 a, vector4 b) -{ - return vector4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); -} - -vector4 __operator__sub__(vector4 a, int b) -{ - return a - vector4(b, b, b, b); -} - -vector4 __operator__sub__(vector4 a, float b) -{ - return a - vector4(b, b, b, b); -} - -vector4 __operator__sub__(int a, vector4 b) -{ - return vector4(a, a, a, a) - b; -} - -vector4 __operator__sub__(float a, vector4 b) -{ - return vector4(a, a, a, a) - b; -} - -vector4 __operator__mul__(vector4 a, vector4 b) -{ - return vector4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); -} - -vector4 __operator__mul__(vector4 a, int b) -{ - return a * vector4(b, b, b, b); -} - -vector4 __operator__mul__(vector4 a, float b) -{ - return a * vector4(b, b, b, b); -} - -vector4 __operator__mul__(int a, vector4 b) -{ - return vector4(a, a, a, a) * b; -} - -vector4 __operator__mul__(float a, vector4 b) -{ - return vector4(a, a, a, a) * b; -} - -vector4 __operator__div__(vector4 a, vector4 b) -{ - return vector4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); -} - -vector4 __operator__div__(vector4 a, int b) -{ - float b_inv = 1 / b; - return a * vector4(b_inv, b_inv, b_inv, b_inv); -} - -vector4 __operator__div__(vector4 a, float b) -{ - float b_inv = 1 / b; - return a * vector4(b_inv, b_inv, b_inv, b_inv); -} - -vector4 __operator__div__(int a, vector4 b) -{ - return vector4(a, a, a, a) / b; -} - -vector4 __operator__div__(float a, vector4 b) -{ - return vector4(a, a, a, a) / b; -} - -int __operator__eq__(vector4 a, vector4 b) -{ - return (a.x == b.x) && (a.y == b.y) && (a.z == b.z) && (a.w == b.w); -} - -int __operator__ne__(vector4 a, vector4 b) -{ - return (a.x != b.x) || (a.y != b.y) || (a.z != b.z) || (a.w != b.w); -} - -// -// For vector4, define most of the stdosl functions to match vector -// - -vector4 abs(vector4 in) -{ - return vector4(abs(in.x), abs(in.y), abs(in.z), abs(in.w)); -} - -vector4 ceil(vector4 in) -{ - return vector4(ceil(in.x), ceil(in.y), ceil(in.z), ceil(in.w)); -} - -vector4 floor(vector4 in) -{ - return vector4(floor(in.x), floor(in.y), floor(in.z), floor(in.w)); -} - -vector4 sqrt(vector4 in) -{ - return vector4(sqrt(in.x), sqrt(in.y), sqrt(in.z), sqrt(in.w)); -} - -vector4 exp(vector4 in) -{ - return vector4(exp(in.x), exp(in.y), exp(in.z), exp(in.w)); -} - -vector4 log(vector4 in) -{ - return vector4(log(in.x), log(in.y), log(in.z), log(in.w)); -} - -vector4 log2(vector4 in) -{ - return vector4(log2(in.x), log2(in.y), log2(in.z), log2(in.w)); -} - -vector4 mix(vector4 value1, vector4 value2, float x) -{ - return vector4(mix(value1.x, value2.x, x), - mix(value1.y, value2.y, x), - mix(value1.z, value2.z, x), - mix(value1.w, value2.w, x)); -} - -vector vec4ToVec3(vector4 v) -{ - return vector(v.x, v.y, v.z) / v.w; -} - -float dot(vector4 a, vector4 b) -{ - return ((a.x * b.x) + (a.y * b.y) + (a.z * b.z) + (a.w * b.w)); -} - -float length(vector4 a) -{ - return sqrt(a.x * a.x + a.y * a.y + a.z * a.z + a.w * a.w); -} - -vector4 smoothstep(vector4 low, vector4 high, vector4 in) -{ - return vector4(smoothstep(low.x, high.x, in.x), - smoothstep(low.y, high.y, in.y), - smoothstep(low.z, high.z, in.z), - smoothstep(low.w, high.w, in.w)); -} - -vector4 smoothstep(float low, float high, vector4 in) -{ - return vector4(smoothstep(low, high, in.x), - smoothstep(low, high, in.y), - smoothstep(low, high, in.z), - smoothstep(low, high, in.w)); -} - -vector4 clamp(vector4 in, vector4 low, vector4 high) -{ - return vector4(clamp(in.x, low.x, high.x), - clamp(in.y, low.y, high.y), - clamp(in.z, low.z, high.z), - clamp(in.w, low.w, high.w)); -} - -vector4 clamp(vector4 in, float low, float high) -{ - return vector4(clamp(in.x, low, high), - clamp(in.y, low, high), - clamp(in.z, low, high), - clamp(in.w, low, high)); -} - -vector4 max(vector4 a, vector4 b) -{ - return vector4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); -} - -vector4 max(vector4 a, float b) -{ - return max(a, vector4(b, b, b, b)); -} - -vector4 normalize(vector4 a) -{ - return a / length(a); -} - -vector4 min(vector4 a, vector4 b) -{ - return vector4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); -} - -vector4 min(vector4 a, float b) -{ - return min(a, vector4(b, b, b, b)); -} - -vector4 fmod(vector4 a, vector4 b) -{ - return vector4(fmod(a.x, b.x), fmod(a.y, b.y), fmod(a.z, b.z), fmod(a.w, b.w)); -} - -vector4 fmod(vector4 a, float b) -{ - return fmod(a, vector4(b, b, b, b)); -} - -vector4 pow(vector4 in, vector4 amount) -{ - return vector4( - pow(in.x, amount.x), pow(in.y, amount.y), pow(in.z, amount.z), pow(in.w, amount.w)); -} - -vector4 pow(vector4 in, float amount) -{ - return vector4(pow(in.x, amount), pow(in.y, amount), pow(in.z, amount), pow(in.w, amount)); -} - -vector4 sign(vector4 a) -{ - return vector4(sign(a.x), sign(a.y), sign(a.z), sign(a.w)); -} - -vector4 sin(vector4 a) -{ - return vector4(sin(a.x), sin(a.y), sin(a.z), sin(a.w)); -} - -vector4 cos(vector4 a) -{ - return vector4(cos(a.x), cos(a.y), cos(a.z), cos(a.w)); -} - -vector4 tan(vector4 a) -{ - return vector4(tan(a.x), tan(a.y), tan(a.z), tan(a.w)); -} - -vector4 asin(vector4 a) -{ - return vector4(asin(a.x), asin(a.y), asin(a.z), asin(a.w)); -} - -vector4 acos(vector4 a) -{ - return vector4(acos(a.x), acos(a.y), acos(a.z), acos(a.w)); -} - -vector4 atan2(vector4 a, float f) -{ - return vector4(atan2(a.x, f), atan2(a.y, f), atan2(a.z, f), atan2(a.w, f)); -} - -vector4 atan2(vector4 a, vector4 b) -{ - return vector4(atan2(a.x, b.x), atan2(a.y, b.y), atan2(a.z, b.z), atan2(a.w, b.w)); -} - -vector4 transform(matrix M, vector4 p) -{ - return vector4(M[0][0] * p.x + M[0][1] * p.y + M[0][2] * p.z + M[0][2] * p.w, - M[1][0] * p.x + M[1][1] * p.y + M[1][2] * p.z + M[1][2] * p.w, - M[2][0] * p.x + M[2][1] * p.y + M[2][2] * p.z + M[2][2] * p.w, - M[3][0] * p.x + M[3][1] * p.y + M[3][2] * p.z + M[3][2] * p.w); -} - -vector4 transform(string fromspace, string tospace, vector4 p) -{ - return transform(matrix(fromspace, tospace), p); -} diff --git a/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h b/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h new file mode 100644 index 00000000000..60ebf415970 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h @@ -0,0 +1,44 @@ +/* + * Copyright 2019 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +ccl_device void kernel_adaptive_adjust_samples(KernelGlobals *kg) +{ + int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if (pixel_index < kernel_split_params.tile.w * kernel_split_params.tile.h) { + int x = kernel_split_params.tile.x + pixel_index % kernel_split_params.tile.w; + int y = kernel_split_params.tile.y + pixel_index / kernel_split_params.tile.w; + int buffer_offset = (kernel_split_params.tile.offset + x + + y * kernel_split_params.tile.stride) * + kernel_data.film.pass_stride; + ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; + int sample = kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples; + if (buffer[kernel_data.film.pass_sample_count] < 0.0f) { + buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count]; + float sample_multiplier = sample / max((float)kernel_split_params.tile.start_sample + 1.0f, + buffer[kernel_data.film.pass_sample_count]); + if (sample_multiplier != 1.0f) { + kernel_adaptive_post_adjust(kg, buffer, sample_multiplier); + } + } + else { + kernel_adaptive_post_adjust(kg, buffer, sample / (sample - 1.0f)); + } + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_adaptive_filter_x.h b/intern/cycles/kernel/split/kernel_adaptive_filter_x.h new file mode 100644 index 00000000000..93f41f7ced4 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_adaptive_filter_x.h @@ -0,0 +1,30 @@ +/* + * Copyright 2019 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +ccl_device void kernel_adaptive_filter_x(KernelGlobals *kg) +{ + int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if (pixel_index < kernel_split_params.tile.h && + kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >= + kernel_data.integrator.adaptive_min_samples) { + int y = kernel_split_params.tile.y + pixel_index; + kernel_do_adaptive_filter_x(kg, y, &kernel_split_params.tile); + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_adaptive_filter_y.h b/intern/cycles/kernel/split/kernel_adaptive_filter_y.h new file mode 100644 index 00000000000..eca53d079ec --- /dev/null +++ b/intern/cycles/kernel/split/kernel_adaptive_filter_y.h @@ -0,0 +1,29 @@ +/* + * Copyright 2019 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +ccl_device void kernel_adaptive_filter_y(KernelGlobals *kg) +{ + int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if (pixel_index < kernel_split_params.tile.w && + kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >= + kernel_data.integrator.adaptive_min_samples) { + int x = kernel_split_params.tile.x + pixel_index; + kernel_do_adaptive_filter_y(kg, x, &kernel_split_params.tile); + } +} +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_adaptive_stopping.h b/intern/cycles/kernel/split/kernel_adaptive_stopping.h new file mode 100644 index 00000000000..c8eb1ebd705 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_adaptive_stopping.h @@ -0,0 +1,37 @@ +/* + * Copyright 2019 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +ccl_device void kernel_adaptive_stopping(KernelGlobals *kg) +{ + int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if (pixel_index < kernel_split_params.tile.w * kernel_split_params.tile.h && + kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >= + kernel_data.integrator.adaptive_min_samples) { + int x = kernel_split_params.tile.x + pixel_index % kernel_split_params.tile.w; + int y = kernel_split_params.tile.y + pixel_index / kernel_split_params.tile.w; + int buffer_offset = (kernel_split_params.tile.offset + x + + y * kernel_split_params.tile.stride) * + kernel_data.film.pass_stride; + ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; + kernel_do_adaptive_stopping(kg, + buffer, + kernel_split_params.tile.start_sample + + kernel_split_params.tile.num_samples - 1); + } +} +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h index 45b839db05f..b24699ec39c 100644 --- a/intern/cycles/kernel/split/kernel_do_volume.h +++ b/intern/cycles/kernel/split/kernel_do_volume.h @@ -44,7 +44,7 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K branched_state->isect.t : FLT_MAX; - bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack); + float step_size = volume_stack_step_size(kg, branched_state->path_state.volume_stack); for (int j = branched_state->next_sample; j < num_samples; j++) { ccl_global PathState *ps = &kernel_split_state.path_state[ray_index]; @@ -61,7 +61,7 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K /* integrate along volume segment with distance sampling */ VolumeIntegrateResult result = kernel_volume_integrate( - kg, ps, sd, &volume_ray, L, tp, heterogeneous); + kg, ps, sd, &volume_ray, L, tp, step_size); # ifdef __VOLUME_SCATTER__ if (result == VOLUME_PATH_SCATTERED) { @@ -164,12 +164,12 @@ ccl_device void kernel_do_volume(KernelGlobals *kg) if (!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { # endif /* __BRANCHED_PATH__ */ - bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); + float step_size = volume_stack_step_size(kg, state->volume_stack); { /* integrate along volume segment with distance sampling */ VolumeIntegrateResult result = kernel_volume_integrate( - kg, state, sd, &volume_ray, L, throughput, heterogeneous); + kg, state, sd, &volume_ray, L, throughput, step_size); # ifdef __VOLUME_SCATTER__ if (result == VOLUME_PATH_SCATTERED) { diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h index 384bc952460..5114f2b03e5 100644 --- a/intern/cycles/kernel/split/kernel_split_common.h +++ b/intern/cycles/kernel/split/kernel_split_common.h @@ -17,6 +17,7 @@ #ifndef __KERNEL_SPLIT_H__ #define __KERNEL_SPLIT_H__ +// clang-format off #include "kernel/kernel_math.h" #include "kernel/kernel_types.h" @@ -52,6 +53,7 @@ #ifdef __BRANCHED_PATH__ # include "kernel/split/kernel_branched.h" #endif +// clang-format on CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h index 433b1221a37..decc537b39b 100644 --- a/intern/cycles/kernel/split/kernel_split_data.h +++ b/intern/cycles/kernel/split/kernel_split_data.h @@ -18,6 +18,7 @@ #define __KERNEL_SPLIT_DATA_H__ #include "kernel/split/kernel_split_data_types.h" + #include "kernel/kernel_globals.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index fd2833ee687..abeb8fa7457 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -161,52 +161,53 @@ CCL_NAMESPACE_END #include "svm_fractal_noise.h" #include "kernel/svm/svm_color_util.h" -#include "kernel/svm/svm_math_util.h" #include "kernel/svm/svm_mapping_util.h" +#include "kernel/svm/svm_math_util.h" #include "kernel/svm/svm_aov.h" #include "kernel/svm/svm_attribute.h" -#include "kernel/svm/svm_gradient.h" #include "kernel/svm/svm_blackbody.h" +#include "kernel/svm/svm_brick.h" +#include "kernel/svm/svm_brightness.h" +#include "kernel/svm/svm_bump.h" +#include "kernel/svm/svm_camera.h" +#include "kernel/svm/svm_checker.h" +#include "kernel/svm/svm_clamp.h" #include "kernel/svm/svm_closure.h" -#include "kernel/svm/svm_noisetex.h" #include "kernel/svm/svm_convert.h" #include "kernel/svm/svm_displace.h" #include "kernel/svm/svm_fresnel.h" -#include "kernel/svm/svm_wireframe.h" -#include "kernel/svm/svm_wavelength.h" -#include "kernel/svm/svm_camera.h" +#include "kernel/svm/svm_gamma.h" #include "kernel/svm/svm_geometry.h" +#include "kernel/svm/svm_gradient.h" #include "kernel/svm/svm_hsv.h" #include "kernel/svm/svm_ies.h" #include "kernel/svm/svm_image.h" -#include "kernel/svm/svm_gamma.h" -#include "kernel/svm/svm_brightness.h" #include "kernel/svm/svm_invert.h" #include "kernel/svm/svm_light_path.h" #include "kernel/svm/svm_magic.h" +#include "kernel/svm/svm_map_range.h" #include "kernel/svm/svm_mapping.h" -#include "kernel/svm/svm_normal.h" -#include "kernel/svm/svm_wave.h" #include "kernel/svm/svm_math.h" #include "kernel/svm/svm_mix.h" +#include "kernel/svm/svm_musgrave.h" +#include "kernel/svm/svm_noisetex.h" +#include "kernel/svm/svm_normal.h" #include "kernel/svm/svm_ramp.h" #include "kernel/svm/svm_sepcomb_hsv.h" #include "kernel/svm/svm_sepcomb_vector.h" -#include "kernel/svm/svm_musgrave.h" #include "kernel/svm/svm_sky.h" #include "kernel/svm/svm_tex_coord.h" #include "kernel/svm/svm_value.h" -#include "kernel/svm/svm_voronoi.h" -#include "kernel/svm/svm_checker.h" -#include "kernel/svm/svm_brick.h" +#include "kernel/svm/svm_vector_rotate.h" #include "kernel/svm/svm_vector_transform.h" +#include "kernel/svm/svm_vertex_color.h" +#include "kernel/svm/svm_voronoi.h" #include "kernel/svm/svm_voxel.h" -#include "kernel/svm/svm_bump.h" -#include "kernel/svm/svm_map_range.h" -#include "kernel/svm/svm_clamp.h" +#include "kernel/svm/svm_wave.h" +#include "kernel/svm/svm_wavelength.h" #include "kernel/svm/svm_white_noise.h" -#include "kernel/svm/svm_vertex_color.h" +#include "kernel/svm/svm_wireframe.h" #ifdef __SHADER_RAYTRACE__ # include "kernel/svm/svm_ao.h" @@ -230,6 +231,8 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, uint4 node = read_node(kg, &offset); switch (node.x) { + case NODE_END: + return; #if NODES_GROUP(NODE_GROUP_LEVEL_0) case NODE_SHADER_JUMP: { if (type == SHADER_TYPE_SURFACE) @@ -309,7 +312,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, svm_node_vector_displacement(kg, sd, stack, node, &offset); break; # endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */ -# ifdef __TEXTURES__ case NODE_TEX_IMAGE: svm_node_tex_image(kg, sd, stack, node, &offset); break; @@ -319,9 +321,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, case NODE_TEX_NOISE: svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, &offset); break; -# endif /* __TEXTURES__ */ -# ifdef __EXTRA_NODES__ -# if NODES_FEATURE(NODE_FEATURE_BUMP) +# if NODES_FEATURE(NODE_FEATURE_BUMP) case NODE_SET_BUMP: svm_node_set_bump(kg, sd, stack, node); break; @@ -346,20 +346,19 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, case NODE_CLOSURE_SET_NORMAL: svm_node_set_normal(kg, sd, stack, node.y, node.z); break; -# if NODES_FEATURE(NODE_FEATURE_BUMP_STATE) +# if NODES_FEATURE(NODE_FEATURE_BUMP_STATE) case NODE_ENTER_BUMP_EVAL: svm_node_enter_bump_eval(kg, sd, stack, node.y); break; case NODE_LEAVE_BUMP_EVAL: svm_node_leave_bump_eval(kg, sd, stack, node.y); break; -# endif /* NODES_FEATURE(NODE_FEATURE_BUMP_STATE) */ -# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */ +# endif /* NODES_FEATURE(NODE_FEATURE_BUMP_STATE) */ +# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */ case NODE_HSV: svm_node_hsv(kg, sd, stack, node, &offset); break; -# endif /* __EXTRA_NODES__ */ -#endif /* NODES_GROUP(NODE_GROUP_LEVEL_0) */ +#endif /* NODES_GROUP(NODE_GROUP_LEVEL_0) */ #if NODES_GROUP(NODE_GROUP_LEVEL_1) case NODE_CLOSURE_HOLDOUT: @@ -379,7 +378,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, svm_node_principled_volume(kg, sd, stack, node, type, path_flag, &offset); break; # endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */ -# ifdef __EXTRA_NODES__ case NODE_MATH: svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset); break; @@ -404,15 +402,12 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, case NODE_PARTICLE_INFO: svm_node_particle_info(kg, sd, stack, node.y, node.z); break; -# ifdef __HAIR__ -# if NODES_FEATURE(NODE_FEATURE_HAIR) +# if defined(__HAIR__) && NODES_FEATURE(NODE_FEATURE_HAIR) case NODE_HAIR_INFO: svm_node_hair_info(kg, sd, stack, node.y, node.z); break; -# endif /* NODES_FEATURE(NODE_FEATURE_HAIR) */ -# endif /* __HAIR__ */ -# endif /* __EXTRA_NODES__ */ -#endif /* NODES_GROUP(NODE_GROUP_LEVEL_1) */ +# endif /* NODES_FEATURE(NODE_FEATURE_HAIR) */ +#endif /* NODES_GROUP(NODE_GROUP_LEVEL_1) */ #if NODES_GROUP(NODE_GROUP_LEVEL_2) case NODE_TEXTURE_MAPPING: @@ -427,7 +422,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, case NODE_CAMERA: svm_node_camera(kg, sd, stack, node.y, node.z, node.w); break; -# ifdef __TEXTURES__ case NODE_TEX_ENVIRONMENT: svm_node_tex_environment(kg, sd, stack, node); break; @@ -458,8 +452,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, case NODE_TEX_WHITE_NOISE: svm_node_tex_white_noise(kg, sd, stack, node.y, node.z, node.w, &offset); break; -# endif /* __TEXTURES__ */ -# ifdef __EXTRA_NODES__ case NODE_NORMAL: svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset); break; @@ -469,19 +461,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, case NODE_IES: svm_node_ies(kg, sd, stack, node, &offset); break; - case NODE_AOV_START: - if (!svm_node_aov_check(state, buffer)) { - return; - } - break; - case NODE_AOV_COLOR: - svm_node_aov_color(kg, sd, stack, node, buffer); - break; - case NODE_AOV_VALUE: - svm_node_aov_value(kg, sd, stack, node, buffer); - break; -# endif /* __EXTRA_NODES__ */ -#endif /* NODES_GROUP(NODE_GROUP_LEVEL_2) */ +#endif /* NODES_GROUP(NODE_GROUP_LEVEL_2) */ #if NODES_GROUP(NODE_GROUP_LEVEL_3) case NODE_RGB_CURVES: @@ -494,7 +474,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, case NODE_NORMAL_MAP: svm_node_normal_map(kg, sd, stack, node); break; -# ifdef __EXTRA_NODES__ case NODE_INVERT: svm_node_invert(sd, stack, node.y, node.z, node.w); break; @@ -513,6 +492,9 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, case NODE_COMBINE_HSV: svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, &offset); break; + case NODE_VECTOR_ROTATE: + svm_node_vector_rotate(sd, stack, node.y, node.z, node.w); + break; case NODE_VECTOR_TRANSFORM: svm_node_vector_transform(kg, sd, stack, node); break; @@ -531,12 +513,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, case NODE_CLAMP: svm_node_clamp(kg, sd, stack, node.y, node.z, node.w, &offset); break; -# endif /* __EXTRA_NODES__ */ -# if NODES_FEATURE(NODE_FEATURE_VOLUME) - case NODE_TEX_VOXEL: - svm_node_tex_voxel(kg, sd, stack, node, &offset); - break; -# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */ # ifdef __SHADER_RAYTRACE__ case NODE_BEVEL: svm_node_bevel(kg, sd, state, stack, node); @@ -546,8 +522,25 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, break; # endif /* __SHADER_RAYTRACE__ */ #endif /* NODES_GROUP(NODE_GROUP_LEVEL_3) */ - case NODE_END: - return; + +#if NODES_GROUP(NODE_GROUP_LEVEL_4) +# if NODES_FEATURE(NODE_FEATURE_VOLUME) + case NODE_TEX_VOXEL: + svm_node_tex_voxel(kg, sd, stack, node, &offset); + break; +# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */ + case NODE_AOV_START: + if (!svm_node_aov_check(state, buffer)) { + return; + } + break; + case NODE_AOV_COLOR: + svm_node_aov_color(kg, sd, stack, node, buffer); + break; + case NODE_AOV_VALUE: + svm_node_aov_value(kg, sd, stack, node, buffer); + break; +#endif /* NODES_GROUP(NODE_GROUP_LEVEL_4) */ default: kernel_assert(!"Unknown node type was passed to the SVM machine"); return; diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index bf2d3f4fbff..cb1b521c585 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -16,23 +16,6 @@ CCL_NAMESPACE_BEGIN -/* Hair Melanin */ - -ccl_device_inline float3 sigma_from_concentration(float eumelanin, float pheomelanin) -{ - return eumelanin * make_float3(0.506f, 0.841f, 1.653f) + - pheomelanin * make_float3(0.343f, 0.733f, 1.924f); -} - -ccl_device_inline float3 sigma_from_reflectance(float3 color, float azimuthal_roughness) -{ - float x = azimuthal_roughness; - float roughness_fac = (((((0.245f * x) + 5.574f) * x - 10.73f) * x + 2.532f) * x - 0.215f) * x + - 5.969f; - float3 sigma = log3(color) / roughness_fac; - return sigma * sigma; -} - /* Closure Nodes */ ccl_device void svm_node_glass_setup( @@ -868,24 +851,26 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, /* Benedikt Bitterli's melanin ratio remapping. */ float eumelanin = melanin * (1.0f - melanin_redness); float pheomelanin = melanin * melanin_redness; - float3 melanin_sigma = sigma_from_concentration(eumelanin, pheomelanin); + float3 melanin_sigma = bsdf_principled_hair_sigma_from_concentration(eumelanin, + pheomelanin); /* Optional tint. */ float3 tint = stack_load_float3(stack, tint_ofs); - float3 tint_sigma = sigma_from_reflectance(tint, radial_roughness); + float3 tint_sigma = bsdf_principled_hair_sigma_from_reflectance(tint, + radial_roughness); bsdf->sigma = melanin_sigma + tint_sigma; break; } case NODE_PRINCIPLED_HAIR_REFLECTANCE: { float3 color = stack_load_float3(stack, color_ofs); - bsdf->sigma = sigma_from_reflectance(color, radial_roughness); + bsdf->sigma = bsdf_principled_hair_sigma_from_reflectance(color, radial_roughness); break; } default: { /* Fallback to brownish hair, same as defaults for melanin. */ kernel_assert(!"Invalid Principled Hair parametrization!"); - bsdf->sigma = sigma_from_concentration(0.0f, 0.8054375f); + bsdf->sigma = bsdf_principled_hair_sigma_from_concentration(0.0f, 0.8054375f); break; } } diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index 90f1a7845c7..f57c85fc23e 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -16,8 +16,6 @@ CCL_NAMESPACE_BEGIN -#ifdef __TEXTURES__ - ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint flags) { if (id == -1) { @@ -30,10 +28,6 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, if ((flags & NODE_IMAGE_ALPHA_UNASSOCIATE) && alpha != 1.0f && alpha != 0.0f) { r /= alpha; - const int texture_type = kernel_tex_type(id); - if (texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_BYTE) { - r = min(r, make_float4(1.0f, 1.0f, 1.0f, 1.0f)); - } r.w = alpha; } @@ -250,6 +244,4 @@ ccl_device void svm_node_tex_environment(KernelGlobals *kg, stack_store_float(stack, alpha_offset, f.w); } -#endif /* __TEXTURES__ */ - CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h index 82cae7bbacf..01e01c399ea 100644 --- a/intern/cycles/kernel/svm/svm_math.h +++ b/intern/cycles/kernel/svm/svm_math.h @@ -51,11 +51,19 @@ ccl_device void svm_node_vector_math(KernelGlobals *kg, float3 a = stack_load_float3(stack, a_stack_offset); float3 b = stack_load_float3(stack, b_stack_offset); + float3 c; float scale = stack_load_float(stack, scale_stack_offset); float value; float3 vector; - svm_vector_math(&value, &vector, (NodeVectorMathType)type, a, b, scale); + + /* 3 Vector Operators */ + if (type == NODE_VECTOR_MATH_WRAP) { + uint4 extra_node = read_node(kg, offset); + c = stack_load_float3(stack, extra_node.x); + } + + svm_vector_math(&value, &vector, (NodeVectorMathType)type, a, b, c, scale); if (stack_valid(value_stack_offset)) stack_store_float(stack, value_stack_offset, value); diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h index 7b9eaaeb710..d1e1fa87e53 100644 --- a/intern/cycles/kernel/svm/svm_math_util.h +++ b/intern/cycles/kernel/svm/svm_math_util.h @@ -16,8 +16,13 @@ CCL_NAMESPACE_BEGIN -ccl_device void svm_vector_math( - float *value, float3 *vector, NodeVectorMathType type, float3 a, float3 b, float scale) +ccl_device void svm_vector_math(float *value, + float3 *vector, + NodeVectorMathType type, + float3 a, + float3 b, + float3 c, + float scale) { switch (type) { case NODE_VECTOR_MATH_ADD: @@ -68,6 +73,9 @@ ccl_device void svm_vector_math( case NODE_VECTOR_MATH_MODULO: *vector = make_float3(safe_modulo(a.x, b.x), safe_modulo(a.y, b.y), safe_modulo(a.z, b.z)); break; + case NODE_VECTOR_MATH_WRAP: + *vector = make_float3(wrapf(a.x, b.x, c.x), wrapf(a.y, b.y, c.y), wrapf(a.z, b.z, c.z)); + break; case NODE_VECTOR_MATH_FRACTION: *vector = a - floor(a); break; @@ -80,6 +88,15 @@ ccl_device void svm_vector_math( case NODE_VECTOR_MATH_MAXIMUM: *vector = max(a, b); break; + case NODE_VECTOR_MATH_SINE: + *vector = make_float3(sinf(a.x), sinf(a.y), sinf(a.z)); + break; + case NODE_VECTOR_MATH_COSINE: + *vector = make_float3(cosf(a.x), cosf(a.y), cosf(a.z)); + break; + case NODE_VECTOR_MATH_TANGENT: + *vector = make_float3(tanf(a.x), tanf(a.y), tanf(a.z)); + break; default: *vector = make_float3(0.0f, 0.0f, 0.0f); *value = 0.0f; diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h index 8dbb147e76a..85ede7770e9 100644 --- a/intern/cycles/kernel/svm/svm_types.h +++ b/intern/cycles/kernel/svm/svm_types.h @@ -42,7 +42,8 @@ CCL_NAMESPACE_BEGIN #define NODE_GROUP_LEVEL_1 1 #define NODE_GROUP_LEVEL_2 2 #define NODE_GROUP_LEVEL_3 3 -#define NODE_GROUP_LEVEL_MAX NODE_GROUP_LEVEL_3 +#define NODE_GROUP_LEVEL_4 4 +#define NODE_GROUP_LEVEL_MAX NODE_GROUP_LEVEL_4 #define NODE_FEATURE_VOLUME (1 << 0) #define NODE_FEATURE_HAIR (1 << 1) @@ -62,97 +63,98 @@ CCL_NAMESPACE_BEGIN typedef enum ShaderNodeType { NODE_END = 0, + NODE_SHADER_JUMP, NODE_CLOSURE_BSDF, NODE_CLOSURE_EMISSION, NODE_CLOSURE_BACKGROUND, NODE_CLOSURE_SET_WEIGHT, NODE_CLOSURE_WEIGHT, + NODE_EMISSION_WEIGHT, NODE_MIX_CLOSURE, NODE_JUMP_IF_ZERO, NODE_JUMP_IF_ONE, - NODE_TEX_IMAGE, - NODE_TEX_IMAGE_BOX, - NODE_TEX_SKY, NODE_GEOMETRY, - NODE_GEOMETRY_DUPLI, - NODE_LIGHT_PATH, + NODE_CONVERT, + NODE_TEX_COORD, NODE_VALUE_F, NODE_VALUE_V, - NODE_MIX, NODE_ATTR, - NODE_CONVERT, - NODE_FRESNEL, - NODE_WIREFRAME, - NODE_WAVELENGTH, - NODE_BLACKBODY, - NODE_EMISSION_WEIGHT, - NODE_TEX_GRADIENT, - NODE_TEX_VORONOI, - NODE_TEX_MUSGRAVE, - NODE_TEX_WAVE, - NODE_TEX_MAGIC, - NODE_TEX_NOISE, - NODE_SHADER_JUMP, - NODE_SET_DISPLACEMENT, + NODE_VERTEX_COLOR, NODE_GEOMETRY_BUMP_DX, NODE_GEOMETRY_BUMP_DY, + NODE_SET_DISPLACEMENT, + NODE_DISPLACEMENT, + NODE_VECTOR_DISPLACEMENT, + NODE_TEX_IMAGE, + NODE_TEX_IMAGE_BOX, + NODE_TEX_NOISE, NODE_SET_BUMP, - NODE_MATH, - NODE_VECTOR_MATH, - NODE_VECTOR_TRANSFORM, - NODE_MAPPING, - NODE_TEX_COORD, - NODE_TEX_COORD_BUMP_DX, - NODE_TEX_COORD_BUMP_DY, NODE_ATTR_BUMP_DX, NODE_ATTR_BUMP_DY, - NODE_TEX_ENVIRONMENT, + NODE_VERTEX_COLOR_BUMP_DX, + NODE_VERTEX_COLOR_BUMP_DY, + NODE_TEX_COORD_BUMP_DX, + NODE_TEX_COORD_BUMP_DY, + NODE_CLOSURE_SET_NORMAL, + NODE_ENTER_BUMP_EVAL, + NODE_LEAVE_BUMP_EVAL, + NODE_HSV, NODE_CLOSURE_HOLDOUT, + NODE_FRESNEL, NODE_LAYER_WEIGHT, NODE_CLOSURE_VOLUME, - NODE_SEPARATE_VECTOR, - NODE_COMBINE_VECTOR, - NODE_SEPARATE_HSV, - NODE_COMBINE_HSV, - NODE_HSV, - NODE_CAMERA, - NODE_INVERT, - NODE_NORMAL, + NODE_PRINCIPLED_VOLUME, + NODE_MATH, + NODE_VECTOR_MATH, + NODE_RGB_RAMP, NODE_GAMMA, - NODE_TEX_CHECKER, NODE_BRIGHTCONTRAST, - NODE_RGB_RAMP, - NODE_RGB_CURVES, - NODE_VECTOR_CURVES, - NODE_MIN_MAX, - NODE_LIGHT_FALLOFF, + NODE_LIGHT_PATH, NODE_OBJECT_INFO, NODE_PARTICLE_INFO, + NODE_HAIR_INFO, + NODE_TEXTURE_MAPPING, + NODE_MAPPING, + NODE_MIN_MAX, + NODE_CAMERA, + NODE_TEX_ENVIRONMENT, + NODE_TEX_SKY, + NODE_TEX_GRADIENT, + NODE_TEX_VORONOI, + NODE_TEX_MUSGRAVE, + NODE_TEX_WAVE, + NODE_TEX_MAGIC, + NODE_TEX_CHECKER, NODE_TEX_BRICK, - NODE_CLOSURE_SET_NORMAL, - NODE_AMBIENT_OCCLUSION, + NODE_TEX_WHITE_NOISE, + NODE_NORMAL, + NODE_LIGHT_FALLOFF, + NODE_IES, + NODE_RGB_CURVES, + NODE_VECTOR_CURVES, NODE_TANGENT, NODE_NORMAL_MAP, - NODE_HAIR_INFO, - NODE_UVMAP, - NODE_TEX_VOXEL, - NODE_ENTER_BUMP_EVAL, - NODE_LEAVE_BUMP_EVAL, - NODE_BEVEL, - NODE_DISPLACEMENT, - NODE_VECTOR_DISPLACEMENT, - NODE_PRINCIPLED_VOLUME, - NODE_IES, + NODE_INVERT, + NODE_MIX, + NODE_SEPARATE_VECTOR, + NODE_COMBINE_VECTOR, + NODE_SEPARATE_HSV, + NODE_COMBINE_HSV, + NODE_VECTOR_ROTATE, + NODE_VECTOR_TRANSFORM, + NODE_WIREFRAME, + NODE_WAVELENGTH, + NODE_BLACKBODY, NODE_MAP_RANGE, NODE_CLAMP, - NODE_TEXTURE_MAPPING, - NODE_TEX_WHITE_NOISE, - NODE_VERTEX_COLOR, - NODE_VERTEX_COLOR_BUMP_DX, - NODE_VERTEX_COLOR_BUMP_DY, + NODE_BEVEL, + NODE_AMBIENT_OCCLUSION, + NODE_TEX_VOXEL, NODE_AOV_START, - NODE_AOV_VALUE, NODE_AOV_COLOR, + NODE_AOV_VALUE, + /* NOTE: for best OpenCL performance, item definition in the enum must + * match the switch case order in svm.h. */ } ShaderNodeType; typedef enum NodeAttributeType { @@ -326,6 +328,10 @@ typedef enum NodeVectorMathType { NODE_VECTOR_MATH_ABSOLUTE, NODE_VECTOR_MATH_MINIMUM, NODE_VECTOR_MATH_MAXIMUM, + NODE_VECTOR_MATH_WRAP, + NODE_VECTOR_MATH_SINE, + NODE_VECTOR_MATH_COSINE, + NODE_VECTOR_MATH_TANGENT, } NodeVectorMathType; typedef enum NodeClampType { @@ -347,6 +353,14 @@ typedef enum NodeMappingType { NODE_MAPPING_TYPE_NORMAL } NodeMappingType; +typedef enum NodeVectorRotateType { + NODE_VECTOR_ROTATE_TYPE_AXIS, + NODE_VECTOR_ROTATE_TYPE_AXIS_X, + NODE_VECTOR_ROTATE_TYPE_AXIS_Y, + NODE_VECTOR_ROTATE_TYPE_AXIS_Z, + NODE_VECTOR_ROTATE_TYPE_EULER_XYZ, +} NodeVectorRotateType; + typedef enum NodeVectorTransformType { NODE_VECTOR_TRANSFORM_TYPE_VECTOR, NODE_VECTOR_TRANSFORM_TYPE_POINT, @@ -380,9 +394,24 @@ typedef enum NodeMusgraveType { typedef enum NodeWaveType { NODE_WAVE_BANDS, NODE_WAVE_RINGS } NodeWaveType; -typedef enum NodeWaveProfiles { +typedef enum NodeWaveBandsDirection { + NODE_WAVE_BANDS_DIRECTION_X, + NODE_WAVE_BANDS_DIRECTION_Y, + NODE_WAVE_BANDS_DIRECTION_Z, + NODE_WAVE_BANDS_DIRECTION_DIAGONAL +} NodeWaveBandsDirection; + +typedef enum NodeWaveRingsDirection { + NODE_WAVE_RINGS_DIRECTION_X, + NODE_WAVE_RINGS_DIRECTION_Y, + NODE_WAVE_RINGS_DIRECTION_Z, + NODE_WAVE_RINGS_DIRECTION_SPHERICAL +} NodeWaveRingsDirection; + +typedef enum NodeWaveProfile { NODE_WAVE_PROFILE_SIN, NODE_WAVE_PROFILE_SAW, + NODE_WAVE_PROFILE_TRI, } NodeWaveProfile; typedef enum NodeSkyType { NODE_SKY_OLD, NODE_SKY_NEW } NodeSkyType; @@ -499,6 +528,7 @@ typedef enum ClosureType { CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID, CLOSURE_BSDF_PRINCIPLED_SHEEN_ID, CLOSURE_BSDF_DIFFUSE_TOON_ID, + CLOSURE_BSDF_TRANSLUCENT_ID, /* Glossy */ CLOSURE_BSDF_REFLECTION_ID, @@ -521,7 +551,6 @@ typedef enum ClosureType { CLOSURE_BSDF_HAIR_REFLECTION_ID, /* Transmission */ - CLOSURE_BSDF_TRANSLUCENT_ID, CLOSURE_BSDF_REFRACTION_ID, CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID, CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID, @@ -562,12 +591,12 @@ typedef enum ClosureType { /* watch this, being lazy with memory usage */ #define CLOSURE_IS_BSDF(type) (type <= CLOSURE_BSDF_TRANSPARENT_ID) #define CLOSURE_IS_BSDF_DIFFUSE(type) \ - (type >= CLOSURE_BSDF_DIFFUSE_ID && type <= CLOSURE_BSDF_DIFFUSE_TOON_ID) + (type >= CLOSURE_BSDF_DIFFUSE_ID && type <= CLOSURE_BSDF_TRANSLUCENT_ID) #define CLOSURE_IS_BSDF_GLOSSY(type) \ ((type >= CLOSURE_BSDF_REFLECTION_ID && type <= CLOSURE_BSDF_HAIR_REFLECTION_ID) || \ (type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID)) #define CLOSURE_IS_BSDF_TRANSMISSION(type) \ - (type >= CLOSURE_BSDF_TRANSLUCENT_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID) + (type >= CLOSURE_BSDF_REFRACTION_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID) #define CLOSURE_IS_BSDF_BSSRDF(type) \ (type == CLOSURE_BSDF_BSSRDF_ID || type == CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID) #define CLOSURE_IS_BSDF_SINGULAR(type) \ diff --git a/intern/cycles/kernel/svm/svm_vector_rotate.h b/intern/cycles/kernel/svm/svm_vector_rotate.h new file mode 100644 index 00000000000..79a4ec2c40e --- /dev/null +++ b/intern/cycles/kernel/svm/svm_vector_rotate.h @@ -0,0 +1,78 @@ +/* + * Copyright 2011-2020 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +/* Vector Rotate */ + +ccl_device void svm_node_vector_rotate(ShaderData *sd, + float *stack, + uint input_stack_offsets, + uint axis_stack_offsets, + uint result_stack_offset) +{ + uint type, vector_stack_offset, rotation_stack_offset, center_stack_offset, axis_stack_offset, + angle_stack_offset, invert; + + svm_unpack_node_uchar4( + input_stack_offsets, &type, &vector_stack_offset, &rotation_stack_offset, &invert); + svm_unpack_node_uchar3( + axis_stack_offsets, ¢er_stack_offset, &axis_stack_offset, &angle_stack_offset); + + if (stack_valid(result_stack_offset)) { + + float3 vector = stack_load_float3(stack, vector_stack_offset); + float3 center = stack_load_float3(stack, center_stack_offset); + float3 result = make_float3(0.0f, 0.0f, 0.0f); + + if (type == NODE_VECTOR_ROTATE_TYPE_EULER_XYZ) { + float3 rotation = stack_load_float3(stack, rotation_stack_offset); // Default XYZ. + Transform rotationTransform = euler_to_transform(rotation); + if (invert) { + result = transform_direction_transposed(&rotationTransform, vector - center) + center; + } + else { + result = transform_direction(&rotationTransform, vector - center) + center; + } + } + else { + float3 axis; + switch (type) { + case NODE_VECTOR_ROTATE_TYPE_AXIS_X: + axis = make_float3(1.0f, 0.0f, 0.0f); + break; + case NODE_VECTOR_ROTATE_TYPE_AXIS_Y: + axis = make_float3(0.0f, 1.0f, 0.0f); + break; + case NODE_VECTOR_ROTATE_TYPE_AXIS_Z: + axis = make_float3(0.0f, 0.0f, 1.0f); + break; + default: + axis = normalize(stack_load_float3(stack, axis_stack_offset)); + break; + } + float angle = stack_load_float(stack, angle_stack_offset); + angle = invert ? -angle : angle; + result = (len_squared(axis) != 0.0f) ? + rotate_around_axis(vector - center, axis, angle) + center : + vector; + } + + stack_store_float3(stack, result_stack_offset, result); + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h index b79be8e5bde..4bc14f82382 100644 --- a/intern/cycles/kernel/svm/svm_voxel.h +++ b/intern/cycles/kernel/svm/svm_voxel.h @@ -39,7 +39,7 @@ ccl_device void svm_node_tex_voxel( co = transform_point(&tfm, co); } - float4 r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z, INTERPOLATION_NONE); + float4 r = kernel_tex_image_interp_3d(kg, id, co, INTERPOLATION_NONE); #else float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); #endif diff --git a/intern/cycles/kernel/svm/svm_wave.h b/intern/cycles/kernel/svm/svm_wave.h index 50c868c0f82..64102535f7d 100644 --- a/intern/cycles/kernel/svm/svm_wave.h +++ b/intern/cycles/kernel/svm/svm_wave.h @@ -19,52 +19,101 @@ CCL_NAMESPACE_BEGIN /* Wave */ ccl_device_noinline_cpu float svm_wave(NodeWaveType type, + NodeWaveBandsDirection bands_dir, + NodeWaveRingsDirection rings_dir, NodeWaveProfile profile, float3 p, float detail, float distortion, - float dscale) + float dscale, + float phase) { + /* Prevent precision issues on unit coordinates. */ + p = (p + 0.000001f) * 0.999999f; + float n; - if (type == NODE_WAVE_BANDS) - n = (p.x + p.y + p.z) * 10.0f; - else /* NODE_WAVE_RINGS */ - n = len(p) * 20.0f; + if (type == NODE_WAVE_BANDS) { + if (bands_dir == NODE_WAVE_BANDS_DIRECTION_X) { + n = p.x * 20.0f; + } + else if (bands_dir == NODE_WAVE_BANDS_DIRECTION_Y) { + n = p.y * 20.0f; + } + else if (bands_dir == NODE_WAVE_BANDS_DIRECTION_Z) { + n = p.z * 20.0f; + } + else { /* NODE_WAVE_BANDS_DIRECTION_DIAGONAL */ + n = (p.x + p.y + p.z) * 10.0f; + } + } + else { /* NODE_WAVE_RINGS */ + float3 rp = p; + if (rings_dir == NODE_WAVE_RINGS_DIRECTION_X) { + rp *= make_float3(0.0f, 1.0f, 1.0f); + } + else if (rings_dir == NODE_WAVE_RINGS_DIRECTION_Y) { + rp *= make_float3(1.0f, 0.0f, 1.0f); + } + else if (rings_dir == NODE_WAVE_RINGS_DIRECTION_Z) { + rp *= make_float3(1.0f, 1.0f, 0.0f); + } + /* else: NODE_WAVE_RINGS_DIRECTION_SPHERICAL */ + + n = len(rp) * 20.0f; + } + + n += phase; if (distortion != 0.0f) n += distortion * (fractal_noise_3d(p * dscale, detail) * 2.0f - 1.0f); if (profile == NODE_WAVE_PROFILE_SIN) { - return 0.5f + 0.5f * sinf(n); + return 0.5f + 0.5f * sinf(n - M_PI_2_F); + } + else if (profile == NODE_WAVE_PROFILE_SAW) { + n /= M_2PI_F; + return n - floorf(n); } - else { /* NODE_WAVE_PROFILE_SAW */ + else { /* NODE_WAVE_PROFILE_TRI */ n /= M_2PI_F; - n -= (int)n; - return (n < 0.0f) ? n + 1.0f : n; + return fabsf(n - floorf(n + 0.5f)) * 2.0f; } } ccl_device void svm_node_tex_wave( KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) { - uint4 node2 = read_node(kg, offset); + uint4 defaults1 = read_node(kg, offset); + uint4 defaults2 = read_node(kg, offset); - uint type; - uint co_offset, scale_offset, detail_offset, dscale_offset, distortion_offset, color_offset, - fac_offset; + /* RNA properties */ + uint type_offset, bands_dir_offset, rings_dir_offset, profile_offset; + /* Inputs, Outputs */ + uint co_offset, scale_offset, distortion_offset, detail_offset, dscale_offset, phase_offset; + uint color_offset, fac_offset; - svm_unpack_node_uchar4(node.y, &type, &color_offset, &fac_offset, &dscale_offset); - svm_unpack_node_uchar4(node.z, &co_offset, &scale_offset, &detail_offset, &distortion_offset); + svm_unpack_node_uchar4( + node.y, &type_offset, &bands_dir_offset, &rings_dir_offset, &profile_offset); + svm_unpack_node_uchar4(node.z, &co_offset, &scale_offset, &distortion_offset, &detail_offset); + svm_unpack_node_uchar4(node.w, &dscale_offset, &phase_offset, &color_offset, &fac_offset); float3 co = stack_load_float3(stack, co_offset); - float scale = stack_load_float_default(stack, scale_offset, node2.x); - float detail = stack_load_float_default(stack, detail_offset, node2.y); - float distortion = stack_load_float_default(stack, distortion_offset, node2.z); - float dscale = stack_load_float_default(stack, dscale_offset, node2.w); + float scale = stack_load_float_default(stack, scale_offset, defaults1.x); + float detail = stack_load_float_default(stack, detail_offset, defaults1.y); + float distortion = stack_load_float_default(stack, distortion_offset, defaults1.z); + float dscale = stack_load_float_default(stack, dscale_offset, defaults1.w); + float phase = stack_load_float_default(stack, phase_offset, defaults2.x); - float f = svm_wave( - (NodeWaveType)type, (NodeWaveProfile)node.w, co * scale, detail, distortion, dscale); + float f = svm_wave((NodeWaveType)type_offset, + (NodeWaveBandsDirection)bands_dir_offset, + (NodeWaveRingsDirection)rings_dir_offset, + (NodeWaveProfile)profile_offset, + co * scale, + detail, + distortion, + dscale, + phase); if (stack_valid(fac_offset)) stack_store_float(stack, fac_offset, f); diff --git a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt index 92578b888a6..472b5a0c101 100644 --- a/intern/cycles/render/CMakeLists.txt +++ b/intern/cycles/render/CMakeLists.txt @@ -19,9 +19,14 @@ set(SRC coverage.cpp denoising.cpp film.cpp + geometry.cpp graph.cpp + hair.cpp image.cpp + image_oiio.cpp + image_vdb.cpp integrator.cpp + jitter.cpp light.cpp merge.cpp mesh.cpp @@ -54,10 +59,15 @@ set(SRC_HEADERS coverage.h denoising.h film.h + geometry.h graph.h + hair.h image.h + image_oiio.h + image_vdb.h integrator.h light.h + jitter.h merge.h mesh.h nodes.h @@ -86,6 +96,29 @@ if(WITH_CYCLES_OSL) list(APPEND LIB cycles_kernel_osl ) + + SET_PROPERTY(SOURCE osl.cpp PROPERTY COMPILE_FLAGS ${RTTI_DISABLE_FLAGS}) +endif() + +if(WITH_OPENCOLORIO) + add_definitions(-DWITH_OCIO) + include_directories( + SYSTEM + ${OPENCOLORIO_INCLUDE_DIRS} + ) + if(WIN32) + add_definitions(-DOpenColorIO_STATIC) + endif() +endif() + +if(WITH_OPENVDB) + add_definitions(-DWITH_OPENVDB ${OPENVDB_DEFINITIONS}) + list(APPEND INC_SYS + ${OPENVDB_INCLUDE_DIRS} + ) + list(APPEND LIB + ${OPENVDB_LIBRARIES} + ) endif() include_directories(${INC}) diff --git a/intern/cycles/render/attribute.cpp b/intern/cycles/render/attribute.cpp index b65c2faa788..4c26d5e8365 100644 --- a/intern/cycles/render/attribute.cpp +++ b/intern/cycles/render/attribute.cpp @@ -14,9 +14,10 @@ * limitations under the License. */ +#include "render/attribute.h" +#include "render/hair.h" #include "render/image.h" #include "render/mesh.h" -#include "render/attribute.h" #include "util/util_foreach.h" #include "util/util_transform.h" @@ -25,46 +26,51 @@ CCL_NAMESPACE_BEGIN /* Attribute */ -Attribute::~Attribute() -{ - /* for voxel data, we need to remove the image from the image manager */ - if (element == ATTR_ELEMENT_VOXEL) { - VoxelAttribute *voxel_data = data_voxel(); - - if (voxel_data && voxel_data->slot != -1) { - voxel_data->manager->remove_image(voxel_data->slot); - } - } -} - -void Attribute::set(ustring name_, TypeDesc type_, AttributeElement element_) +Attribute::Attribute( + ustring name, TypeDesc type, AttributeElement element, Geometry *geom, AttributePrimitive prim) + : name(name), std(ATTR_STD_NONE), type(type), element(element), flags(0) { - name = name_; - type = type_; - element = element_; - std = ATTR_STD_NONE; - flags = 0; - /* string and matrix not supported! */ assert(type == TypeDesc::TypeFloat || type == TypeDesc::TypeColor || type == TypeDesc::TypePoint || type == TypeDesc::TypeVector || type == TypeDesc::TypeNormal || type == TypeDesc::TypeMatrix || type == TypeFloat2 || type == TypeRGBA); + + if (element == ATTR_ELEMENT_VOXEL) { + buffer.resize(sizeof(ImageHandle)); + new (buffer.data()) ImageHandle(); + } + else { + resize(geom, prim, false); + } } -void Attribute::resize(Mesh *mesh, AttributePrimitive prim, bool reserve_only) +Attribute::~Attribute() { - if (reserve_only) { - buffer.reserve(buffer_size(mesh, prim)); + /* For voxel data, we need to free the image handle. */ + if (element == ATTR_ELEMENT_VOXEL && buffer.size()) { + ImageHandle &handle = data_voxel(); + handle.~ImageHandle(); } - else { - buffer.resize(buffer_size(mesh, prim), 0); +} + +void Attribute::resize(Geometry *geom, AttributePrimitive prim, bool reserve_only) +{ + if (element != ATTR_ELEMENT_VOXEL) { + if (reserve_only) { + buffer.reserve(buffer_size(geom, prim)); + } + else { + buffer.resize(buffer_size(geom, prim), 0); + } } } void Attribute::resize(size_t num_elements) { - buffer.resize(num_elements * data_sizeof(), 0); + if (element != ATTR_ELEMENT_VOXEL) { + buffer.resize(num_elements * data_sizeof(), 0); + } } void Attribute::add(const float &f) @@ -122,17 +128,6 @@ void Attribute::add(const Transform &f) buffer.push_back(data[i]); } -void Attribute::add(const VoxelAttribute &f) -{ - assert(data_sizeof() == sizeof(VoxelAttribute)); - - char *data = (char *)&f; - size_t size = sizeof(f); - - for (size_t i = 0; i < size; i++) - buffer.push_back(data[i]); -} - void Attribute::add(const char *data) { size_t size = data_sizeof(); @@ -144,7 +139,7 @@ void Attribute::add(const char *data) size_t Attribute::data_sizeof() const { if (element == ATTR_ELEMENT_VOXEL) - return sizeof(VoxelAttribute); + return sizeof(ImageHandle); else if (element == ATTR_ELEMENT_CORNER_BYTE) return sizeof(uchar4); else if (type == TypeDesc::TypeFloat) @@ -157,13 +152,13 @@ size_t Attribute::data_sizeof() const return sizeof(float3); } -size_t Attribute::element_size(Mesh *mesh, AttributePrimitive prim) const +size_t Attribute::element_size(Geometry *geom, AttributePrimitive prim) const { if (flags & ATTR_FINAL_SIZE) { return buffer.size() / data_sizeof(); } - size_t size; + size_t size = 0; switch (element) { case ATTR_ELEMENT_OBJECT: @@ -172,54 +167,74 @@ size_t Attribute::element_size(Mesh *mesh, AttributePrimitive prim) const size = 1; break; case ATTR_ELEMENT_VERTEX: - size = mesh->verts.size() + mesh->num_ngons; - if (prim == ATTR_PRIM_SUBD) { - size -= mesh->num_subd_verts; + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + size = mesh->verts.size() + mesh->num_ngons; + if (prim == ATTR_PRIM_SUBD) { + size -= mesh->num_subd_verts; + } } break; case ATTR_ELEMENT_VERTEX_MOTION: - size = (mesh->verts.size() + mesh->num_ngons) * (mesh->motion_steps - 1); - if (prim == ATTR_PRIM_SUBD) { - size -= mesh->num_subd_verts * (mesh->motion_steps - 1); + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + size = (mesh->verts.size() + mesh->num_ngons) * (mesh->motion_steps - 1); + if (prim == ATTR_PRIM_SUBD) { + size -= mesh->num_subd_verts * (mesh->motion_steps - 1); + } } break; case ATTR_ELEMENT_FACE: - if (prim == ATTR_PRIM_TRIANGLE) { - size = mesh->num_triangles(); - } - else { - size = mesh->subd_faces.size() + mesh->num_ngons; + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + if (prim == ATTR_PRIM_GEOMETRY) { + size = mesh->num_triangles(); + } + else { + size = mesh->subd_faces.size() + mesh->num_ngons; + } } break; case ATTR_ELEMENT_CORNER: case ATTR_ELEMENT_CORNER_BYTE: - if (prim == ATTR_PRIM_TRIANGLE) { - size = mesh->num_triangles() * 3; - } - else { - size = mesh->subd_face_corners.size() + mesh->num_ngons; + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + if (prim == ATTR_PRIM_GEOMETRY) { + size = mesh->num_triangles() * 3; + } + else { + size = mesh->subd_face_corners.size() + mesh->num_ngons; + } } break; case ATTR_ELEMENT_CURVE: - size = mesh->num_curves(); + if (geom->type == Geometry::HAIR) { + Hair *hair = static_cast<Hair *>(geom); + size = hair->num_curves(); + } break; case ATTR_ELEMENT_CURVE_KEY: - size = mesh->curve_keys.size(); + if (geom->type == Geometry::HAIR) { + Hair *hair = static_cast<Hair *>(geom); + size = hair->curve_keys.size(); + } break; case ATTR_ELEMENT_CURVE_KEY_MOTION: - size = mesh->curve_keys.size() * (mesh->motion_steps - 1); + if (geom->type == Geometry::HAIR) { + Hair *hair = static_cast<Hair *>(geom); + size = hair->curve_keys.size() * (hair->motion_steps - 1); + } break; default: - size = 0; break; } return size; } -size_t Attribute::buffer_size(Mesh *mesh, AttributePrimitive prim) const +size_t Attribute::buffer_size(Geometry *geom, AttributePrimitive prim) const { - return element_size(mesh, prim) * data_sizeof(); + return element_size(geom, prim) * data_sizeof(); } bool Attribute::same_storage(TypeDesc a, TypeDesc b) @@ -280,6 +295,8 @@ const char *Attribute::standard_name(AttributeStandard std) return "tangent"; case ATTR_STD_UV_TANGENT_SIGN: return "tangent_sign"; + case ATTR_STD_VERTEX_COLOR: + return "vertex_color"; case ATTR_STD_POSITION_UNDEFORMED: return "undeformed"; case ATTR_STD_POSITION_UNDISPLACED: @@ -336,13 +353,42 @@ AttributeStandard Attribute::name_standard(const char *name) return ATTR_STD_NONE; } +void Attribute::get_uv_tiles(Geometry *geom, + AttributePrimitive prim, + unordered_set<int> &tiles) const +{ + if (type != TypeFloat2) { + return; + } + + const int num = element_size(geom, prim); + const float2 *uv = data_float2(); + for (int i = 0; i < num; i++, uv++) { + float u = uv->x, v = uv->y; + int x = (int)u, y = (int)v; + + if (x < 0 || y < 0 || x >= 10) { + continue; + } + + /* Be conservative in corners - precisely touching the right or upper edge of a tile + * should not load its right/upper neighbor as well. */ + if (x > 0 && (u < x + 1e-6f)) { + x--; + } + if (y > 0 && (v < y + 1e-6f)) { + y--; + } + + tiles.insert(1001 + 10 * y + x); + } +} + /* Attribute Set */ -AttributeSet::AttributeSet() +AttributeSet::AttributeSet(Geometry *geometry, AttributePrimitive prim) + : geometry(geometry), prim(prim) { - triangle_mesh = NULL; - curve_mesh = NULL; - subd_mesh = NULL; } AttributeSet::~AttributeSet() @@ -362,28 +408,9 @@ Attribute *AttributeSet::add(ustring name, TypeDesc type, AttributeElement eleme remove(name); } -#if __cplusplus >= 201103L - attributes.emplace_back(); - attr = &attributes.back(); - attr->set(name, type, element); -#else - { - Attribute attr_temp; - attr_temp.set(name, type, element); - attributes.push_back(attr_temp); - attr = &attributes.back(); - } -#endif - - /* this is weak .. */ - if (triangle_mesh) - attr->resize(triangle_mesh, ATTR_PRIM_TRIANGLE, false); - if (curve_mesh) - attr->resize(curve_mesh, ATTR_PRIM_CURVE, false); - if (subd_mesh) - attr->resize(subd_mesh, ATTR_PRIM_SUBD, false); - - return attr; + Attribute new_attr(name, type, element, geometry, prim); + attributes.emplace_back(std::move(new_attr)); + return &attributes.back(); } Attribute *AttributeSet::find(ustring name) const @@ -418,7 +445,7 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name) if (name == ustring()) name = Attribute::standard_name(std); - if (triangle_mesh || subd_mesh) { + if (geometry->type == Geometry::MESH) { switch (std) { case ATTR_STD_VERTEX_NORMAL: attr = add(name, TypeDesc::TypeNormal, ATTR_ELEMENT_VERTEX); @@ -435,6 +462,9 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name) case ATTR_STD_UV_TANGENT_SIGN: attr = add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_CORNER); break; + case ATTR_STD_VERTEX_COLOR: + attr = add(name, TypeRGBA, ATTR_ELEMENT_CORNER_BYTE); + break; case ATTR_STD_GENERATED: case ATTR_STD_POSITION_UNDEFORMED: case ATTR_STD_POSITION_UNDISPLACED: @@ -478,7 +508,7 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name) break; } } - else if (curve_mesh) { + else if (geometry->type == Geometry::HAIR) { switch (std) { case ATTR_STD_UV: attr = add(name, TypeFloat2, ATTR_ELEMENT_CURVE); @@ -561,12 +591,7 @@ void AttributeSet::remove(Attribute *attribute) void AttributeSet::resize(bool reserve_only) { foreach (Attribute &attr, attributes) { - if (triangle_mesh) - attr.resize(triangle_mesh, ATTR_PRIM_TRIANGLE, reserve_only); - if (curve_mesh) - attr.resize(curve_mesh, ATTR_PRIM_CURVE, reserve_only); - if (subd_mesh) - attr.resize(subd_mesh, ATTR_PRIM_SUBD, reserve_only); + attr.resize(geometry, prim, reserve_only); } } @@ -596,15 +621,10 @@ AttributeRequest::AttributeRequest(ustring name_) name = name_; std = ATTR_STD_NONE; - triangle_type = TypeDesc::TypeFloat; - triangle_desc.element = ATTR_ELEMENT_NONE; - triangle_desc.offset = 0; - triangle_desc.type = NODE_ATTR_FLOAT; - - curve_type = TypeDesc::TypeFloat; - curve_desc.element = ATTR_ELEMENT_NONE; - curve_desc.offset = 0; - curve_desc.type = NODE_ATTR_FLOAT; + type = TypeDesc::TypeFloat; + desc.element = ATTR_ELEMENT_NONE; + desc.offset = 0; + desc.type = NODE_ATTR_FLOAT; subd_type = TypeDesc::TypeFloat; subd_desc.element = ATTR_ELEMENT_NONE; @@ -617,15 +637,10 @@ AttributeRequest::AttributeRequest(AttributeStandard std_) name = ustring(); std = std_; - triangle_type = TypeDesc::TypeFloat; - triangle_desc.element = ATTR_ELEMENT_NONE; - triangle_desc.offset = 0; - triangle_desc.type = NODE_ATTR_FLOAT; - - curve_type = TypeDesc::TypeFloat; - curve_desc.element = ATTR_ELEMENT_NONE; - curve_desc.offset = 0; - curve_desc.type = NODE_ATTR_FLOAT; + type = TypeDesc::TypeFloat; + desc.element = ATTR_ELEMENT_NONE; + desc.offset = 0; + desc.type = NODE_ATTR_FLOAT; subd_type = TypeDesc::TypeFloat; subd_desc.element = ATTR_ELEMENT_NONE; diff --git a/intern/cycles/render/attribute.h b/intern/cycles/render/attribute.h index ebab0fe7f88..5871fa04a31 100644 --- a/intern/cycles/render/attribute.h +++ b/intern/cycles/render/attribute.h @@ -17,10 +17,13 @@ #ifndef __ATTRIBUTE_H__ #define __ATTRIBUTE_H__ +#include "render/image.h" + #include "kernel/kernel_types.h" #include "util/util_list.h" #include "util/util_param.h" +#include "util/util_set.h" #include "util/util_types.h" #include "util/util_vector.h" @@ -30,17 +33,12 @@ class Attribute; class AttributeRequest; class AttributeRequestSet; class AttributeSet; -class ImageManager; +class ImageHandle; +class Geometry; +class Hair; class Mesh; struct Transform; -/* Attributes for voxels are images */ - -struct VoxelAttribute { - ImageManager *manager; - int slot; -}; - /* Attribute * * Arbitrary data layers on meshes. @@ -56,17 +54,23 @@ class Attribute { AttributeElement element; uint flags; /* enum AttributeFlag */ - Attribute() - { - } + Attribute(ustring name, + TypeDesc type, + AttributeElement element, + Geometry *geom, + AttributePrimitive prim); + Attribute(Attribute &&other) = default; + Attribute(const Attribute &other) = delete; + Attribute &operator=(const Attribute &other) = delete; ~Attribute(); + void set(ustring name, TypeDesc type, AttributeElement element); - void resize(Mesh *mesh, AttributePrimitive prim, bool reserve_only); + void resize(Geometry *geom, AttributePrimitive prim, bool reserve_only); void resize(size_t num_elements); size_t data_sizeof() const; - size_t element_size(Mesh *mesh, AttributePrimitive prim) const; - size_t buffer_size(Mesh *mesh, AttributePrimitive prim) const; + size_t element_size(Geometry *geom, AttributePrimitive prim) const; + size_t buffer_size(Geometry *geom, AttributePrimitive prim) const; char *data() { @@ -102,10 +106,12 @@ class Attribute { assert(data_sizeof() == sizeof(Transform)); return (Transform *)data(); } - VoxelAttribute *data_voxel() + + /* Attributes for voxels are images */ + ImageHandle &data_voxel() { - assert(data_sizeof() == sizeof(VoxelAttribute)); - return (VoxelAttribute *)data(); + assert(data_sizeof() == sizeof(ImageHandle)); + return *(ImageHandle *)data(); } const char *data() const @@ -137,10 +143,10 @@ class Attribute { assert(data_sizeof() == sizeof(Transform)); return (const Transform *)data(); } - const VoxelAttribute *data_voxel() const + const ImageHandle &data_voxel() const { - assert(data_sizeof() == sizeof(VoxelAttribute)); - return (const VoxelAttribute *)data(); + assert(data_sizeof() == sizeof(ImageHandle)); + return *(const ImageHandle *)data(); } void zero_data(void *dst); @@ -150,13 +156,14 @@ class Attribute { void add(const float2 &f); void add(const float3 &f); void add(const uchar4 &f); - void add(const Transform &f); - void add(const VoxelAttribute &f); + void add(const Transform &tfm); void add(const char *data); static bool same_storage(TypeDesc a, TypeDesc b); static const char *standard_name(AttributeStandard std); static AttributeStandard name_standard(const char *name); + + void get_uv_tiles(Geometry *geom, AttributePrimitive prim, unordered_set<int> &tiles) const; }; /* Attribute Set @@ -165,12 +172,11 @@ class Attribute { class AttributeSet { public: - Mesh *triangle_mesh; - Mesh *curve_mesh; - Mesh *subd_mesh; + Geometry *geometry; + AttributePrimitive prim; list<Attribute> attributes; - AttributeSet(); + AttributeSet(Geometry *geometry, AttributePrimitive prim); ~AttributeSet(); Attribute *add(ustring name, TypeDesc type, AttributeElement element); @@ -200,9 +206,9 @@ class AttributeRequest { ustring name; AttributeStandard std; - /* temporary variables used by MeshManager */ - TypeDesc triangle_type, curve_type, subd_type; - AttributeDescriptor triangle_desc, curve_desc, subd_desc; + /* temporary variables used by GeometryManager */ + TypeDesc type, subd_type; + AttributeDescriptor desc, subd_desc; explicit AttributeRequest(ustring name_); explicit AttributeRequest(AttributeStandard std); diff --git a/intern/cycles/render/background.cpp b/intern/cycles/render/background.cpp index 6553ca735e4..694bb640995 100644 --- a/intern/cycles/render/background.cpp +++ b/intern/cycles/render/background.cpp @@ -16,8 +16,8 @@ #include "render/background.h" #include "device/device.h" -#include "render/integrator.h" #include "render/graph.h" +#include "render/integrator.h" #include "render/nodes.h" #include "render/scene.h" #include "render/shader.h" @@ -43,6 +43,8 @@ NODE_DEFINE(Background) SOCKET_BOOLEAN(transparent_glass, "Transparent Glass", false); SOCKET_FLOAT(transparent_roughness_threshold, "Transparent Roughness Threshold", 0.0f); + SOCKET_FLOAT(volume_step_size, "Volume Step Size", 0.1f); + SOCKET_NODE(shader, "Shader", &Shader::node_type); return type; @@ -51,6 +53,7 @@ NODE_DEFINE(Background) Background::Background() : Node(node_type) { need_update = true; + shader = NULL; } Background::~Background() @@ -91,6 +94,8 @@ void Background::device_update(Device *device, DeviceScene *dscene, Scene *scene else kbackground->volume_shader = SHADER_NONE; + kbackground->volume_step_size = volume_step_size * scene->integrator->volume_step_rate; + /* No background node, make world shader invisible to all rays, to skip evaluation in kernel. */ if (bg_shader->graph->nodes.size() <= 1) { kbackground->surface_shader |= SHADER_EXCLUDE_ANY; diff --git a/intern/cycles/render/background.h b/intern/cycles/render/background.h index fb27430f9a3..c2ca1f75179 100644 --- a/intern/cycles/render/background.h +++ b/intern/cycles/render/background.h @@ -45,6 +45,8 @@ class Background : public Node { bool transparent_glass; float transparent_roughness_threshold; + float volume_step_size; + bool need_update; Background(); diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp index b906357b7b5..35f942b3e9b 100644 --- a/intern/cycles/render/bake.cpp +++ b/intern/cycles/render/bake.cpp @@ -15,10 +15,10 @@ */ #include "render/bake.h" +#include "render/integrator.h" #include "render/mesh.h" #include "render/object.h" #include "render/shader.h" -#include "render/integrator.h" #include "util/util_foreach.h" @@ -253,8 +253,8 @@ int BakeManager::aa_samples(Scene *scene, BakeData *bake_data, ShaderEvalType ty /* Only antialias normal if mesh has bump mapping. */ Object *object = scene->objects[bake_data->object()]; - if (object->mesh) { - foreach (Shader *shader, object->mesh->used_shaders) { + if (object->geometry) { + foreach (Shader *shader, object->geometry->used_shaders) { if (shader->has_bump) { return scene->integrator->aa_samples; } @@ -285,8 +285,6 @@ int BakeManager::shader_type_to_pass_filter(ShaderEvalType type, const int pass_ return BAKE_FILTER_GLOSSY | component_flags; case SHADER_EVAL_TRANSMISSION: return BAKE_FILTER_TRANSMISSION | component_flags; - case SHADER_EVAL_SUBSURFACE: - return BAKE_FILTER_SUBSURFACE | component_flags; case SHADER_EVAL_COMBINED: return pass_filter; default: diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index 50308d0d377..2d89fb9ffba 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -16,8 +16,8 @@ #include <stdlib.h> -#include "render/buffers.h" #include "device/device.h" +#include "render/buffers.h" #include "util/util_foreach.h" #include "util/util_hash.h" @@ -43,6 +43,8 @@ BufferParams::BufferParams() denoising_data_pass = false; denoising_clean_pass = false; denoising_prefiltered_pass = false; + + Pass::add(PASS_COMBINED, passes); } void BufferParams::get_offset_stride(int &offset, int &stride) @@ -144,7 +146,7 @@ void RenderBuffers::reset(BufferParams ¶ms_) params = params_; /* re-allocate buffer */ - buffer.alloc(params.width * params.height * params.get_passes_size()); + buffer.alloc(params.width * params.get_passes_size(), params.height); buffer.zero_to_device(); } @@ -258,6 +260,22 @@ bool RenderBuffers::get_pass_rect( return false; } + float *sample_count = NULL; + if (name == "Combined") { + int sample_offset = 0; + for (size_t j = 0; j < params.passes.size(); j++) { + Pass &pass = params.passes[j]; + if (pass.type != PASS_SAMPLE_COUNT) { + sample_offset += pass.components; + continue; + } + else { + sample_count = buffer.data() + sample_offset; + break; + } + } + } + int pass_offset = 0; for (size_t j = 0; j < params.passes.size(); j++) { @@ -418,6 +436,11 @@ bool RenderBuffers::get_pass_rect( } else { for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) { + if (sample_count && sample_count[i * pass_stride] < 0.0f) { + scale = (pass.filter) ? -1.0f / (sample_count[i * pass_stride]) : 1.0f; + scale_exposure = (pass.exposure) ? scale * exposure : scale; + } + float4 f = make_float4(in[0], in[1], in[2], in[3]); pixels[0] = f.x * scale_exposure; diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index 1042b42810f..42efb031843 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -130,7 +130,7 @@ class DisplayBuffer { class RenderTile { public: - typedef enum { PATH_TRACE, DENOISE } Task; + typedef enum { PATH_TRACE = (1 << 0), DENOISE = (1 << 1) } Task; Task task; int x, y, w, h; diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp index 38306a63c74..74953afae9d 100644 --- a/intern/cycles/render/camera.cpp +++ b/intern/cycles/render/camera.cpp @@ -29,6 +29,7 @@ #include "util/util_vector.h" /* needed for calculating differentials */ +// clang-format off #include "kernel/kernel_compat_cpu.h" #include "kernel/split/kernel_split_data.h" #include "kernel/kernel_globals.h" @@ -36,6 +37,7 @@ #include "kernel/kernel_differential.h" #include "kernel/kernel_montecarlo.h" #include "kernel/kernel_camera.h" +// clang-format on CCL_NAMESPACE_BEGIN @@ -498,7 +500,7 @@ void Camera::device_update_volume(Device * /*device*/, DeviceScene *dscene, Scen BoundBox viewplane_boundbox = viewplane_bounds_get(); for (size_t i = 0; i < scene->objects.size(); ++i) { Object *object = scene->objects[i]; - if (object->mesh->has_volume && viewplane_boundbox.intersects(object->bounds)) { + if (object->geometry->has_volume && viewplane_boundbox.intersects(object->bounds)) { /* TODO(sergey): Consider adding more grained check. */ VLOG(1) << "Detected camera inside volume."; kcam->is_inside_volume = 1; diff --git a/intern/cycles/render/colorspace.cpp b/intern/cycles/render/colorspace.cpp index 2e5b53057c0..7605fcaf5ff 100644 --- a/intern/cycles/render/colorspace.cpp +++ b/intern/cycles/render/colorspace.cpp @@ -17,8 +17,8 @@ #include "render/colorspace.h" #include "util/util_color.h" -#include "util/util_image.h" #include "util/util_half.h" +#include "util/util_image.h" #include "util/util_logging.h" #include "util/util_math.h" #include "util/util_thread.h" @@ -262,56 +262,49 @@ template<typename T> inline void cast_from_float4(T *data, float4 value) /* Slower versions for other all data types, which needs to convert to float and back. */ template<typename T, bool compress_as_srgb = false> -inline void processor_apply_pixels(const OCIO::Processor *processor, - T *pixels, - size_t width, - size_t height) +inline void processor_apply_pixels(const OCIO::Processor *processor, T *pixels, size_t num_pixels) { /* TODO: implement faster version for when we know the conversion * is a simple matrix transform between linear spaces. In that case * unpremultiply is not needed. */ /* Process large images in chunks to keep temporary memory requirement down. */ - size_t y_chunk_size = max(1, 16 * 1024 * 1024 / (sizeof(float4) * width)); - vector<float4> float_pixels(y_chunk_size * width); - - for (size_t y0 = 0; y0 < height; y0 += y_chunk_size) { - size_t y1 = std::min(y0 + y_chunk_size, height); - size_t i = 0; + const size_t chunk_size = std::min((size_t)(16 * 1024 * 1024), num_pixels); + vector<float4> float_pixels(chunk_size); - for (size_t y = y0; y < y1; y++) { - for (size_t x = 0; x < width; x++, i++) { - float4 value = cast_to_float4(pixels + 4 * (y * width + x)); + for (size_t j = 0; j < num_pixels; j += chunk_size) { + size_t width = std::min(chunk_size, num_pixels - j); - if (!(value.w == 0.0f || value.w == 1.0f)) { - float inv_alpha = 1.0f / value.w; - value.x *= inv_alpha; - value.y *= inv_alpha; - value.z *= inv_alpha; - } + for (size_t i = 0; i < width; i++) { + float4 value = cast_to_float4(pixels + 4 * (j + i)); - float_pixels[i] = value; + if (!(value.w <= 0.0f || value.w == 1.0f)) { + float inv_alpha = 1.0f / value.w; + value.x *= inv_alpha; + value.y *= inv_alpha; + value.z *= inv_alpha; } + + float_pixels[i] = value; } - OCIO::PackedImageDesc desc((float *)float_pixels.data(), width, y_chunk_size, 4); + OCIO::PackedImageDesc desc((float *)float_pixels.data(), width, 1, 4); processor->apply(desc); - i = 0; - for (size_t y = y0; y < y1; y++) { - for (size_t x = 0; x < width; x++, i++) { - float4 value = float_pixels[i]; + for (size_t i = 0; i < width; i++) { + float4 value = float_pixels[i]; + + if (compress_as_srgb) { + value = color_linear_to_srgb_v4(value); + } + if (!(value.w <= 0.0f || value.w == 1.0f)) { value.x *= value.w; value.y *= value.w; value.z *= value.w; - - if (compress_as_srgb) { - value = color_linear_to_srgb_v4(value); - } - - cast_from_float4(pixels + 4 * (y * width + x), value); } + + cast_from_float4(pixels + 4 * (j + i), value); } } } @@ -320,9 +313,7 @@ inline void processor_apply_pixels(const OCIO::Processor *processor, template<typename T> void ColorSpaceManager::to_scene_linear(ustring colorspace, T *pixels, - size_t width, - size_t height, - size_t depth, + size_t num_pixels, bool compress_as_srgb) { #ifdef WITH_OCIO @@ -331,23 +322,17 @@ void ColorSpaceManager::to_scene_linear(ustring colorspace, if (processor) { if (compress_as_srgb) { /* Compress output as sRGB. */ - for (size_t z = 0; z < depth; z++) { - processor_apply_pixels<T, true>(processor, &pixels[z * width * height], width, height); - } + processor_apply_pixels<T, true>(processor, pixels, num_pixels); } else { /* Write output as scene linear directly. */ - for (size_t z = 0; z < depth; z++) { - processor_apply_pixels<T>(processor, &pixels[z * width * height], width, height); - } + processor_apply_pixels<T>(processor, pixels, num_pixels); } } #else (void)colorspace; (void)pixels; - (void)width; - (void)height; - (void)depth; + (void)num_pixels; (void)compress_as_srgb; #endif } @@ -402,9 +387,9 @@ void ColorSpaceManager::free_memory() } /* Template instanstations so we don't have to inline functions. */ -template void ColorSpaceManager::to_scene_linear(ustring, uchar *, size_t, size_t, size_t, bool); -template void ColorSpaceManager::to_scene_linear(ustring, ushort *, size_t, size_t, size_t, bool); -template void ColorSpaceManager::to_scene_linear(ustring, half *, size_t, size_t, size_t, bool); -template void ColorSpaceManager::to_scene_linear(ustring, float *, size_t, size_t, size_t, bool); +template void ColorSpaceManager::to_scene_linear(ustring, uchar *, size_t, bool); +template void ColorSpaceManager::to_scene_linear(ustring, ushort *, size_t, bool); +template void ColorSpaceManager::to_scene_linear(ustring, half *, size_t, bool); +template void ColorSpaceManager::to_scene_linear(ustring, float *, size_t, bool); CCL_NAMESPACE_END diff --git a/intern/cycles/render/colorspace.h b/intern/cycles/render/colorspace.h index 9fea2d6efc6..51d0b121cc0 100644 --- a/intern/cycles/render/colorspace.h +++ b/intern/cycles/render/colorspace.h @@ -45,9 +45,7 @@ class ColorSpaceManager { template<typename T> static void to_scene_linear(ustring colorspace, T *pixels, - size_t width, - size_t height, - size_t depth, + size_t num_pixels, bool compress_as_srgb); /* Efficiently convert pixels to scene linear colorspace at render time, diff --git a/intern/cycles/render/constant_fold.h b/intern/cycles/render/constant_fold.h index 7f622488a88..fec4123c361 100644 --- a/intern/cycles/render/constant_fold.h +++ b/intern/cycles/render/constant_fold.h @@ -17,8 +17,8 @@ #ifndef __CONSTANT_FOLD_H__ #define __CONSTANT_FOLD_H__ -#include "util/util_types.h" #include "kernel/svm/svm_types.h" +#include "util/util_types.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/render/coverage.cpp b/intern/cycles/render/coverage.cpp index 0a29903728a..99d4daa6961 100644 --- a/intern/cycles/render/coverage.cpp +++ b/intern/cycles/render/coverage.cpp @@ -15,13 +15,16 @@ */ #include "render/coverage.h" +#include "render/buffers.h" + #include "kernel/kernel_compat_cpu.h" +#include "kernel/kernel_types.h" #include "kernel/split/kernel_split_data.h" + #include "kernel/kernel_globals.h" #include "kernel/kernel_id_passes.h" -#include "kernel/kernel_types.h" + #include "util/util_map.h" -#include "util/util_vector.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/render/coverage.h b/intern/cycles/render/coverage.h index 3d1f6a2b040..12182c614da 100644 --- a/intern/cycles/render/coverage.h +++ b/intern/cycles/render/coverage.h @@ -14,18 +14,19 @@ * limitations under the License. */ -#include "render/buffers.h" -#include "kernel/kernel_compat_cpu.h" -#include "kernel/split/kernel_split_data.h" -#include "kernel/kernel_globals.h" +#ifndef __COVERAGE_H__ +#define __COVERAGE_H__ + #include "util/util_map.h" #include "util/util_vector.h" -#ifndef __COVERAGE_H__ -# define __COVERAGE_H__ - CCL_NAMESPACE_BEGIN +struct KernelGlobals; +class RenderTile; + +typedef unordered_map<float, float> CoverageMap; + class Coverage { public: Coverage(KernelGlobals *kg_, RenderTile &tile_) : kg(kg_), tile(tile_) diff --git a/intern/cycles/render/curves.cpp b/intern/cycles/render/curves.cpp index 66fbc9eb4a8..1907bb33d06 100644 --- a/intern/cycles/render/curves.cpp +++ b/intern/cycles/render/curves.cpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "device/device.h" #include "render/curves.h" +#include "device/device.h" #include "render/mesh.h" #include "render/object.h" #include "render/scene.h" diff --git a/intern/cycles/render/denoising.h b/intern/cycles/render/denoising.h index c234d00eb49..5c6f913cb38 100644 --- a/intern/cycles/render/denoising.h +++ b/intern/cycles/render/denoising.h @@ -23,8 +23,8 @@ #include "render/buffers.h" #include "util/util_string.h" -#include "util/util_vector.h" #include "util/util_unique_ptr.h" +#include "util/util_vector.h" #include <OpenImageIO/imageio.h> diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp index bd274844b52..baf02901123 100644 --- a/intern/cycles/render/film.cpp +++ b/intern/cycles/render/film.cpp @@ -14,9 +14,9 @@ * limitations under the License. */ -#include "render/camera.h" -#include "device/device.h" #include "render/film.h" +#include "device/device.h" +#include "render/camera.h" #include "render/integrator.h" #include "render/mesh.h" #include "render/scene.h" @@ -155,7 +155,6 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name) case PASS_DIFFUSE_COLOR: case PASS_GLOSSY_COLOR: case PASS_TRANSMISSION_COLOR: - case PASS_SUBSURFACE_COLOR: pass.components = 4; break; case PASS_DIFFUSE_DIRECT: @@ -176,12 +175,6 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name) pass.exposure = true; pass.divide_type = PASS_TRANSMISSION_COLOR; break; - case PASS_SUBSURFACE_DIRECT: - case PASS_SUBSURFACE_INDIRECT: - pass.components = 4; - pass.exposure = true; - pass.divide_type = PASS_SUBSURFACE_COLOR; - break; case PASS_VOLUME_DIRECT: case PASS_VOLUME_INDIRECT: pass.components = 4; @@ -190,6 +183,13 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name) case PASS_CRYPTOMATTE: pass.components = 4; break; + case PASS_ADAPTIVE_AUX_BUFFER: + pass.components = 4; + break; + case PASS_SAMPLE_COUNT: + pass.components = 1; + pass.exposure = false; + break; case PASS_AOV_COLOR: pass.components = 4; break; @@ -203,9 +203,10 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name) passes.push_back(pass); - /* order from by components, to ensure alignment so passes with size 4 - * come first and then passes with size 1 */ - sort(&passes[0], &passes[0] + passes.size(), compare_pass_order); + /* Order from by components, to ensure alignment so passes with size 4 + * come first and then passes with size 1. Note this must use stable sort + * so cryptomatte passes remain in the right order. */ + stable_sort(&passes[0], &passes[0] + passes.size(), compare_pass_order); if (pass.divide_type != PASS_NONE) Pass::add(pass.divide_type, passes); @@ -318,15 +319,19 @@ NODE_DEFINE(Film) SOCKET_BOOLEAN(denoising_clean_pass, "Generate Denoising Clean Pass", false); SOCKET_BOOLEAN(denoising_prefiltered_pass, "Generate Denoising Prefiltered Pass", false); SOCKET_INT(denoising_flags, "Denoising Flags", 0); + SOCKET_BOOLEAN(use_adaptive_sampling, "Use Adaptive Sampling", false); return type; } Film::Film() : Node(node_type) { + Pass::add(PASS_COMBINED, passes); + use_light_visibility = false; filter_table_offset = TABLE_OFFSET_INVALID; cryptomatte_passes = CRYPT_NONE; + display_pass = PASS_COMBINED; need_update = true; } @@ -439,9 +444,6 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) case PASS_TRANSMISSION_COLOR: kfilm->pass_transmission_color = kfilm->pass_stride; break; - case PASS_SUBSURFACE_COLOR: - kfilm->pass_subsurface_color = kfilm->pass_stride; - break; case PASS_DIFFUSE_INDIRECT: kfilm->pass_diffuse_indirect = kfilm->pass_stride; break; @@ -451,9 +453,6 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) case PASS_TRANSMISSION_INDIRECT: kfilm->pass_transmission_indirect = kfilm->pass_stride; break; - case PASS_SUBSURFACE_INDIRECT: - kfilm->pass_subsurface_indirect = kfilm->pass_stride; - break; case PASS_VOLUME_INDIRECT: kfilm->pass_volume_indirect = kfilm->pass_stride; break; @@ -466,9 +465,6 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) case PASS_TRANSMISSION_DIRECT: kfilm->pass_transmission_direct = kfilm->pass_stride; break; - case PASS_SUBSURFACE_DIRECT: - kfilm->pass_subsurface_direct = kfilm->pass_stride; - break; case PASS_VOLUME_DIRECT: kfilm->pass_volume_direct = kfilm->pass_stride; break; @@ -495,6 +491,12 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) kfilm->pass_stride; have_cryptomatte = true; break; + case PASS_ADAPTIVE_AUX_BUFFER: + kfilm->pass_adaptive_aux_buffer = kfilm->pass_stride; + break; + case PASS_SAMPLE_COUNT: + kfilm->pass_sample_count = kfilm->pass_stride; + break; case PASS_AOV_COLOR: if (!have_aov_color) { kfilm->pass_aov_color = kfilm->pass_stride; @@ -518,7 +520,7 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) kfilm->use_display_exposure = pass.exposure && (kfilm->exposure != 1.0f); } else if (pass.type == PASS_DIFFUSE_COLOR || pass.type == PASS_TRANSMISSION_COLOR || - pass.type == PASS_GLOSSY_COLOR || pass.type == PASS_SUBSURFACE_COLOR) { + pass.type == PASS_GLOSSY_COLOR) { kfilm->display_divide_pass_stride = kfilm->pass_stride; } @@ -590,13 +592,13 @@ bool Film::modified(const Film &film) void Film::tag_passes_update(Scene *scene, const vector<Pass> &passes_, bool update_passes) { if (Pass::contains(passes, PASS_UV) != Pass::contains(passes_, PASS_UV)) { - scene->mesh_manager->tag_update(scene); + scene->geometry_manager->tag_update(scene); foreach (Shader *shader, scene->shaders) - shader->need_update_mesh = true; + shader->need_update_geometry = true; } else if (Pass::contains(passes, PASS_MOTION) != Pass::contains(passes_, PASS_MOTION)) { - scene->mesh_manager->tag_update(scene); + scene->geometry_manager->tag_update(scene); } else if (Pass::contains(passes, PASS_AO) != Pass::contains(passes_, PASS_AO)) { scene->integrator->tag_update(scene); diff --git a/intern/cycles/render/film.h b/intern/cycles/render/film.h index 95e54cb54d8..aae8fb404b0 100644 --- a/intern/cycles/render/film.h +++ b/intern/cycles/render/film.h @@ -81,6 +81,8 @@ class Film : public Node { CryptomatteType cryptomatte_passes; int cryptomatte_depth; + bool use_adaptive_sampling; + bool need_update; Film(); diff --git a/intern/cycles/render/geometry.cpp b/intern/cycles/render/geometry.cpp new file mode 100644 index 00000000000..d46ed430c4f --- /dev/null +++ b/intern/cycles/render/geometry.cpp @@ -0,0 +1,1470 @@ +/* + * Copyright 2011-2020 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "bvh/bvh.h" +#include "bvh/bvh_build.h" + +#ifdef WITH_EMBREE +# include "bvh/bvh_embree.h" +#endif + +#include "render/attribute.h" +#include "render/camera.h" +#include "render/geometry.h" +#include "render/hair.h" +#include "render/light.h" +#include "render/mesh.h" +#include "render/nodes.h" +#include "render/object.h" +#include "render/scene.h" +#include "render/shader.h" +#include "render/stats.h" + +#include "subd/subd_patch_table.h" +#include "subd/subd_split.h" + +#include "kernel/osl/osl_globals.h" + +#include "util/util_foreach.h" +#include "util/util_logging.h" +#include "util/util_progress.h" + +CCL_NAMESPACE_BEGIN + +/* Geometry */ + +NODE_ABSTRACT_DEFINE(Geometry) +{ + NodeType *type = NodeType::add("geometry_base", NULL); + + SOCKET_UINT(motion_steps, "Motion Steps", 3); + SOCKET_BOOLEAN(use_motion_blur, "Use Motion Blur", false); + + return type; +} + +Geometry::Geometry(const NodeType *node_type, const Type type) + : Node(node_type), type(type), attributes(this, ATTR_PRIM_GEOMETRY) +{ + need_update = true; + need_update_rebuild = false; + + transform_applied = false; + transform_negative_scaled = false; + transform_normal = transform_identity(); + bounds = BoundBox::empty; + + has_volume = false; + has_surface_bssrdf = false; + + bvh = NULL; + attr_map_offset = 0; + optix_prim_offset = 0; + prim_offset = 0; +} + +Geometry::~Geometry() +{ + delete bvh; +} + +void Geometry::clear() +{ + used_shaders.clear(); + transform_applied = false; + transform_negative_scaled = false; + transform_normal = transform_identity(); +} + +bool Geometry::need_attribute(Scene *scene, AttributeStandard std) +{ + if (std == ATTR_STD_NONE) + return false; + + if (scene->need_global_attribute(std)) + return true; + + foreach (Shader *shader, used_shaders) + if (shader->attributes.find(std)) + return true; + + return false; +} + +bool Geometry::need_attribute(Scene * /*scene*/, ustring name) +{ + if (name == ustring()) + return false; + + foreach (Shader *shader, used_shaders) + if (shader->attributes.find(name)) + return true; + + return false; +} + +float Geometry::motion_time(int step) const +{ + return (motion_steps > 1) ? 2.0f * step / (motion_steps - 1) - 1.0f : 0.0f; +} + +int Geometry::motion_step(float time) const +{ + if (motion_steps > 1) { + int attr_step = 0; + + for (int step = 0; step < motion_steps; step++) { + float step_time = motion_time(step); + if (step_time == time) { + return attr_step; + } + + /* Center step is stored in a separate attribute. */ + if (step != motion_steps / 2) { + attr_step++; + } + } + } + + return -1; +} + +bool Geometry::need_build_bvh(BVHLayout layout) const +{ + return !transform_applied || has_surface_bssrdf || layout == BVH_LAYOUT_OPTIX; +} + +bool Geometry::is_instanced() const +{ + /* Currently we treat subsurface objects as instanced. + * + * While it might be not very optimal for ray traversal, it avoids having + * duplicated BVH in the memory, saving quite some space. + */ + return !transform_applied || has_surface_bssrdf; +} + +bool Geometry::has_true_displacement() const +{ + foreach (Shader *shader, used_shaders) { + if (shader->has_displacement && shader->displacement_method != DISPLACE_BUMP) { + return true; + } + } + + return false; +} + +void Geometry::compute_bvh( + Device *device, DeviceScene *dscene, SceneParams *params, Progress *progress, int n, int total) +{ + if (progress->get_cancel()) + return; + + compute_bounds(); + + const BVHLayout bvh_layout = BVHParams::best_bvh_layout(params->bvh_layout, + device->get_bvh_layout_mask()); + if (need_build_bvh(bvh_layout)) { + string msg = "Updating Geometry BVH "; + if (name.empty()) + msg += string_printf("%u/%u", (uint)(n + 1), (uint)total); + else + msg += string_printf("%s %u/%u", name.c_str(), (uint)(n + 1), (uint)total); + + Object object; + object.geometry = this; + + vector<Geometry *> geometry; + geometry.push_back(this); + vector<Object *> objects; + objects.push_back(&object); + + if (bvh && !need_update_rebuild) { + progress->set_status(msg, "Refitting BVH"); + + bvh->geometry = geometry; + bvh->objects = objects; + + bvh->refit(*progress); + } + else { + progress->set_status(msg, "Building BVH"); + + BVHParams bparams; + bparams.use_spatial_split = params->use_bvh_spatial_split; + bparams.bvh_layout = bvh_layout; + bparams.use_unaligned_nodes = dscene->data.bvh.have_curves && + params->use_bvh_unaligned_nodes; + bparams.num_motion_triangle_steps = params->num_bvh_time_steps; + bparams.num_motion_curve_steps = params->num_bvh_time_steps; + bparams.bvh_type = params->bvh_type; + bparams.curve_flags = dscene->data.curve.curveflags; + bparams.curve_subdivisions = dscene->data.curve.subdivisions; + + delete bvh; + bvh = BVH::create(bparams, geometry, objects); + MEM_GUARDED_CALL(progress, bvh->build, *progress); + } + } + + need_update = false; + need_update_rebuild = false; +} + +bool Geometry::has_motion_blur() const +{ + return (use_motion_blur && attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)); +} + +bool Geometry::has_voxel_attributes() const +{ + foreach (const Attribute &attr, attributes.attributes) { + if (attr.element == ATTR_ELEMENT_VOXEL) { + return true; + } + } + + return false; +} + +void Geometry::tag_update(Scene *scene, bool rebuild) +{ + need_update = true; + + if (rebuild) { + need_update_rebuild = true; + scene->light_manager->need_update = true; + } + else { + foreach (Shader *shader, used_shaders) + if (shader->has_surface_emission) + scene->light_manager->need_update = true; + } + + scene->geometry_manager->need_update = true; + scene->object_manager->need_update = true; +} + +/* Geometry Manager */ + +GeometryManager::GeometryManager() +{ + need_update = true; + need_flags_update = true; +} + +GeometryManager::~GeometryManager() +{ +} + +void GeometryManager::update_osl_attributes(Device *device, + Scene *scene, + vector<AttributeRequestSet> &geom_attributes) +{ +#ifdef WITH_OSL + /* for OSL, a hash map is used to lookup the attribute by name. */ + OSLGlobals *og = (OSLGlobals *)device->osl_memory(); + + og->object_name_map.clear(); + og->attribute_map.clear(); + og->object_names.clear(); + + og->attribute_map.resize(scene->objects.size() * ATTR_PRIM_TYPES); + + for (size_t i = 0; i < scene->objects.size(); i++) { + /* set object name to object index map */ + Object *object = scene->objects[i]; + og->object_name_map[object->name] = i; + og->object_names.push_back(object->name); + + /* set object attributes */ + foreach (ParamValue &attr, object->attributes) { + OSLGlobals::Attribute osl_attr; + + osl_attr.type = attr.type(); + osl_attr.desc.element = ATTR_ELEMENT_OBJECT; + osl_attr.value = attr; + osl_attr.desc.offset = 0; + osl_attr.desc.flags = 0; + + og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_GEOMETRY][attr.name()] = osl_attr; + og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_SUBD][attr.name()] = osl_attr; + } + + /* find geometry attributes */ + size_t j; + + for (j = 0; j < scene->geometry.size(); j++) + if (scene->geometry[j] == object->geometry) + break; + + AttributeRequestSet &attributes = geom_attributes[j]; + + /* set object attributes */ + foreach (AttributeRequest &req, attributes.requests) { + OSLGlobals::Attribute osl_attr; + + if (req.desc.element != ATTR_ELEMENT_NONE) { + osl_attr.desc = req.desc; + + if (req.type == TypeDesc::TypeFloat) + osl_attr.type = TypeDesc::TypeFloat; + else if (req.type == TypeDesc::TypeMatrix) + osl_attr.type = TypeDesc::TypeMatrix; + else if (req.type == TypeFloat2) + osl_attr.type = TypeFloat2; + else if (req.type == TypeRGBA) + osl_attr.type = TypeRGBA; + else + osl_attr.type = TypeDesc::TypeColor; + + if (req.std != ATTR_STD_NONE) { + /* if standard attribute, add lookup by geom: name convention */ + ustring stdname(string("geom:") + string(Attribute::standard_name(req.std))); + og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_GEOMETRY][stdname] = osl_attr; + } + else if (req.name != ustring()) { + /* add lookup by geometry attribute name */ + og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_GEOMETRY][req.name] = osl_attr; + } + } + + if (req.subd_desc.element != ATTR_ELEMENT_NONE) { + osl_attr.desc = req.subd_desc; + + if (req.subd_type == TypeDesc::TypeFloat) + osl_attr.type = TypeDesc::TypeFloat; + else if (req.subd_type == TypeDesc::TypeMatrix) + osl_attr.type = TypeDesc::TypeMatrix; + else if (req.subd_type == TypeFloat2) + osl_attr.type = TypeFloat2; + else if (req.subd_type == TypeRGBA) + osl_attr.type = TypeRGBA; + else + osl_attr.type = TypeDesc::TypeColor; + + if (req.std != ATTR_STD_NONE) { + /* if standard attribute, add lookup by geom: name convention */ + ustring stdname(string("geom:") + string(Attribute::standard_name(req.std))); + og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_SUBD][stdname] = osl_attr; + } + else if (req.name != ustring()) { + /* add lookup by geometry attribute name */ + og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_SUBD][req.name] = osl_attr; + } + } + } + } +#else + (void)device; + (void)scene; + (void)geom_attributes; +#endif +} + +void GeometryManager::update_svm_attributes(Device *, + DeviceScene *dscene, + Scene *scene, + vector<AttributeRequestSet> &geom_attributes) +{ + /* for SVM, the attributes_map table is used to lookup the offset of an + * attribute, based on a unique shader attribute id. */ + + /* compute array stride */ + int attr_map_size = 0; + + for (size_t i = 0; i < scene->geometry.size(); i++) { + Geometry *geom = scene->geometry[i]; + geom->attr_map_offset = attr_map_size; + attr_map_size += (geom_attributes[i].size() + 1) * ATTR_PRIM_TYPES; + } + + if (attr_map_size == 0) + return; + + /* create attribute map */ + uint4 *attr_map = dscene->attributes_map.alloc(attr_map_size); + memset(attr_map, 0, dscene->attributes_map.size() * sizeof(uint)); + + for (size_t i = 0; i < scene->geometry.size(); i++) { + Geometry *geom = scene->geometry[i]; + AttributeRequestSet &attributes = geom_attributes[i]; + + /* set object attributes */ + int index = geom->attr_map_offset; + + foreach (AttributeRequest &req, attributes.requests) { + uint id; + + if (req.std == ATTR_STD_NONE) + id = scene->shader_manager->get_attribute_id(req.name); + else + id = scene->shader_manager->get_attribute_id(req.std); + + attr_map[index].x = id; + attr_map[index].y = req.desc.element; + attr_map[index].z = as_uint(req.desc.offset); + + if (req.type == TypeDesc::TypeFloat) + attr_map[index].w = NODE_ATTR_FLOAT; + else if (req.type == TypeDesc::TypeMatrix) + attr_map[index].w = NODE_ATTR_MATRIX; + else if (req.type == TypeFloat2) + attr_map[index].w = NODE_ATTR_FLOAT2; + else if (req.type == TypeRGBA) + attr_map[index].w = NODE_ATTR_RGBA; + else + attr_map[index].w = NODE_ATTR_FLOAT3; + + attr_map[index].w |= req.desc.flags << 8; + + index++; + + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + if (mesh->subd_faces.size()) { + attr_map[index].x = id; + attr_map[index].y = req.subd_desc.element; + attr_map[index].z = as_uint(req.subd_desc.offset); + + if (req.subd_type == TypeDesc::TypeFloat) + attr_map[index].w = NODE_ATTR_FLOAT; + else if (req.subd_type == TypeDesc::TypeMatrix) + attr_map[index].w = NODE_ATTR_MATRIX; + else if (req.subd_type == TypeFloat2) + attr_map[index].w = NODE_ATTR_FLOAT2; + else if (req.subd_type == TypeRGBA) + attr_map[index].w = NODE_ATTR_RGBA; + else + attr_map[index].w = NODE_ATTR_FLOAT3; + + attr_map[index].w |= req.subd_desc.flags << 8; + } + } + + index++; + } + + /* terminator */ + for (int j = 0; j < ATTR_PRIM_TYPES; j++) { + attr_map[index].x = ATTR_STD_NONE; + attr_map[index].y = 0; + attr_map[index].z = 0; + attr_map[index].w = 0; + + index++; + } + } + + /* copy to device */ + dscene->attributes_map.copy_to_device(); +} + +static void update_attribute_element_size(Geometry *geom, + Attribute *mattr, + AttributePrimitive prim, + size_t *attr_float_size, + size_t *attr_float2_size, + size_t *attr_float3_size, + size_t *attr_uchar4_size) +{ + if (mattr) { + size_t size = mattr->element_size(geom, prim); + + if (mattr->element == ATTR_ELEMENT_VOXEL) { + /* pass */ + } + else if (mattr->element == ATTR_ELEMENT_CORNER_BYTE) { + *attr_uchar4_size += size; + } + else if (mattr->type == TypeDesc::TypeFloat) { + *attr_float_size += size; + } + else if (mattr->type == TypeFloat2) { + *attr_float2_size += size; + } + else if (mattr->type == TypeDesc::TypeMatrix) { + *attr_float3_size += size * 4; + } + else { + *attr_float3_size += size; + } + } +} + +static void update_attribute_element_offset(Geometry *geom, + device_vector<float> &attr_float, + size_t &attr_float_offset, + device_vector<float2> &attr_float2, + size_t &attr_float2_offset, + device_vector<float4> &attr_float3, + size_t &attr_float3_offset, + device_vector<uchar4> &attr_uchar4, + size_t &attr_uchar4_offset, + Attribute *mattr, + AttributePrimitive prim, + TypeDesc &type, + AttributeDescriptor &desc) +{ + if (mattr) { + /* store element and type */ + desc.element = mattr->element; + desc.flags = mattr->flags; + type = mattr->type; + + /* store attribute data in arrays */ + size_t size = mattr->element_size(geom, prim); + + AttributeElement &element = desc.element; + int &offset = desc.offset; + + if (mattr->element == ATTR_ELEMENT_VOXEL) { + /* store slot in offset value */ + ImageHandle &handle = mattr->data_voxel(); + offset = handle.svm_slot(); + } + else if (mattr->element == ATTR_ELEMENT_CORNER_BYTE) { + uchar4 *data = mattr->data_uchar4(); + offset = attr_uchar4_offset; + + assert(attr_uchar4.size() >= offset + size); + for (size_t k = 0; k < size; k++) { + attr_uchar4[offset + k] = data[k]; + } + attr_uchar4_offset += size; + } + else if (mattr->type == TypeDesc::TypeFloat) { + float *data = mattr->data_float(); + offset = attr_float_offset; + + assert(attr_float.size() >= offset + size); + for (size_t k = 0; k < size; k++) { + attr_float[offset + k] = data[k]; + } + attr_float_offset += size; + } + else if (mattr->type == TypeFloat2) { + float2 *data = mattr->data_float2(); + offset = attr_float2_offset; + + assert(attr_float2.size() >= offset + size); + for (size_t k = 0; k < size; k++) { + attr_float2[offset + k] = data[k]; + } + attr_float2_offset += size; + } + else if (mattr->type == TypeDesc::TypeMatrix) { + Transform *tfm = mattr->data_transform(); + offset = attr_float3_offset; + + assert(attr_float3.size() >= offset + size * 3); + for (size_t k = 0; k < size * 3; k++) { + attr_float3[offset + k] = (&tfm->x)[k]; + } + attr_float3_offset += size * 3; + } + else { + float4 *data = mattr->data_float4(); + offset = attr_float3_offset; + + assert(attr_float3.size() >= offset + size); + for (size_t k = 0; k < size; k++) { + attr_float3[offset + k] = data[k]; + } + attr_float3_offset += size; + } + + /* mesh vertex/curve index is global, not per object, so we sneak + * a correction for that in here */ + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + if (mesh->subdivision_type == Mesh::SUBDIVISION_CATMULL_CLARK && + desc.flags & ATTR_SUBDIVIDED) { + /* indices for subdivided attributes are retrieved + * from patch table so no need for correction here*/ + } + else if (element == ATTR_ELEMENT_VERTEX) + offset -= mesh->vert_offset; + else if (element == ATTR_ELEMENT_VERTEX_MOTION) + offset -= mesh->vert_offset; + else if (element == ATTR_ELEMENT_FACE) { + if (prim == ATTR_PRIM_GEOMETRY) + offset -= mesh->prim_offset; + else + offset -= mesh->face_offset; + } + else if (element == ATTR_ELEMENT_CORNER || element == ATTR_ELEMENT_CORNER_BYTE) { + if (prim == ATTR_PRIM_GEOMETRY) + offset -= 3 * mesh->prim_offset; + else + offset -= mesh->corner_offset; + } + } + else if (geom->type == Geometry::HAIR) { + Hair *hair = static_cast<Hair *>(geom); + if (element == ATTR_ELEMENT_CURVE) + offset -= hair->prim_offset; + else if (element == ATTR_ELEMENT_CURVE_KEY) + offset -= hair->curvekey_offset; + else if (element == ATTR_ELEMENT_CURVE_KEY_MOTION) + offset -= hair->curvekey_offset; + } + } + else { + /* attribute not found */ + desc.element = ATTR_ELEMENT_NONE; + desc.offset = 0; + } +} + +void GeometryManager::device_update_attributes(Device *device, + DeviceScene *dscene, + Scene *scene, + Progress &progress) +{ + progress.set_status("Updating Mesh", "Computing attributes"); + + /* gather per mesh requested attributes. as meshes may have multiple + * shaders assigned, this merges the requested attributes that have + * been set per shader by the shader manager */ + vector<AttributeRequestSet> geom_attributes(scene->geometry.size()); + + for (size_t i = 0; i < scene->geometry.size(); i++) { + Geometry *geom = scene->geometry[i]; + + scene->need_global_attributes(geom_attributes[i]); + + foreach (Shader *shader, geom->used_shaders) { + geom_attributes[i].add(shader->attributes); + } + } + + /* mesh attribute are stored in a single array per data type. here we fill + * those arrays, and set the offset and element type to create attribute + * maps next */ + + /* Pre-allocate attributes to avoid arrays re-allocation which would + * take 2x of overall attribute memory usage. + */ + size_t attr_float_size = 0; + size_t attr_float2_size = 0; + size_t attr_float3_size = 0; + size_t attr_uchar4_size = 0; + for (size_t i = 0; i < scene->geometry.size(); i++) { + Geometry *geom = scene->geometry[i]; + AttributeRequestSet &attributes = geom_attributes[i]; + foreach (AttributeRequest &req, attributes.requests) { + Attribute *attr = geom->attributes.find(req); + + update_attribute_element_size(geom, + attr, + ATTR_PRIM_GEOMETRY, + &attr_float_size, + &attr_float2_size, + &attr_float3_size, + &attr_uchar4_size); + + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + Attribute *subd_attr = mesh->subd_attributes.find(req); + + update_attribute_element_size(mesh, + subd_attr, + ATTR_PRIM_SUBD, + &attr_float_size, + &attr_float2_size, + &attr_float3_size, + &attr_uchar4_size); + } + } + } + + dscene->attributes_float.alloc(attr_float_size); + dscene->attributes_float2.alloc(attr_float2_size); + dscene->attributes_float3.alloc(attr_float3_size); + dscene->attributes_uchar4.alloc(attr_uchar4_size); + + size_t attr_float_offset = 0; + size_t attr_float2_offset = 0; + size_t attr_float3_offset = 0; + size_t attr_uchar4_offset = 0; + + /* Fill in attributes. */ + for (size_t i = 0; i < scene->geometry.size(); i++) { + Geometry *geom = scene->geometry[i]; + AttributeRequestSet &attributes = geom_attributes[i]; + + /* todo: we now store std and name attributes from requests even if + * they actually refer to the same mesh attributes, optimize */ + foreach (AttributeRequest &req, attributes.requests) { + Attribute *attr = geom->attributes.find(req); + update_attribute_element_offset(geom, + dscene->attributes_float, + attr_float_offset, + dscene->attributes_float2, + attr_float2_offset, + dscene->attributes_float3, + attr_float3_offset, + dscene->attributes_uchar4, + attr_uchar4_offset, + attr, + ATTR_PRIM_GEOMETRY, + req.type, + req.desc); + + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + Attribute *subd_attr = mesh->subd_attributes.find(req); + + update_attribute_element_offset(mesh, + dscene->attributes_float, + attr_float_offset, + dscene->attributes_float2, + attr_float2_offset, + dscene->attributes_float3, + attr_float3_offset, + dscene->attributes_uchar4, + attr_uchar4_offset, + subd_attr, + ATTR_PRIM_SUBD, + req.subd_type, + req.subd_desc); + } + + if (progress.get_cancel()) + return; + } + } + + /* create attribute lookup maps */ + if (scene->shader_manager->use_osl()) + update_osl_attributes(device, scene, geom_attributes); + + update_svm_attributes(device, dscene, scene, geom_attributes); + + if (progress.get_cancel()) + return; + + /* copy to device */ + progress.set_status("Updating Mesh", "Copying Attributes to device"); + + if (dscene->attributes_float.size()) { + dscene->attributes_float.copy_to_device(); + } + if (dscene->attributes_float2.size()) { + dscene->attributes_float2.copy_to_device(); + } + if (dscene->attributes_float3.size()) { + dscene->attributes_float3.copy_to_device(); + } + if (dscene->attributes_uchar4.size()) { + dscene->attributes_uchar4.copy_to_device(); + } + + if (progress.get_cancel()) + return; + + /* After mesh attributes and patch tables have been copied to device memory, + * we need to update offsets in the objects. */ + scene->object_manager->device_update_mesh_offsets(device, dscene, scene); +} + +void GeometryManager::mesh_calc_offset(Scene *scene) +{ + size_t vert_size = 0; + size_t tri_size = 0; + + size_t curve_key_size = 0; + size_t curve_size = 0; + + size_t patch_size = 0; + size_t face_size = 0; + size_t corner_size = 0; + + size_t optix_prim_size = 0; + + foreach (Geometry *geom, scene->geometry) { + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + + mesh->vert_offset = vert_size; + mesh->prim_offset = tri_size; + + mesh->patch_offset = patch_size; + mesh->face_offset = face_size; + mesh->corner_offset = corner_size; + + vert_size += mesh->verts.size(); + tri_size += mesh->num_triangles(); + + if (mesh->subd_faces.size()) { + Mesh::SubdFace &last = mesh->subd_faces[mesh->subd_faces.size() - 1]; + patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8; + + /* patch tables are stored in same array so include them in patch_size */ + if (mesh->patch_table) { + mesh->patch_table_offset = patch_size; + patch_size += mesh->patch_table->total_size(); + } + } + + face_size += mesh->subd_faces.size(); + corner_size += mesh->subd_face_corners.size(); + + mesh->optix_prim_offset = optix_prim_size; + optix_prim_size += mesh->num_triangles(); + } + else if (geom->type == Geometry::HAIR) { + Hair *hair = static_cast<Hair *>(geom); + + hair->curvekey_offset = curve_key_size; + hair->prim_offset = curve_size; + + curve_key_size += hair->curve_keys.size(); + curve_size += hair->num_curves(); + + hair->optix_prim_offset = optix_prim_size; + optix_prim_size += hair->num_segments(); + } + } +} + +void GeometryManager::device_update_mesh( + Device *, DeviceScene *dscene, Scene *scene, bool for_displacement, Progress &progress) +{ + /* Count. */ + size_t vert_size = 0; + size_t tri_size = 0; + + size_t curve_key_size = 0; + size_t curve_size = 0; + + size_t patch_size = 0; + + foreach (Geometry *geom, scene->geometry) { + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + + vert_size += mesh->verts.size(); + tri_size += mesh->num_triangles(); + + if (mesh->subd_faces.size()) { + Mesh::SubdFace &last = mesh->subd_faces[mesh->subd_faces.size() - 1]; + patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8; + + /* patch tables are stored in same array so include them in patch_size */ + if (mesh->patch_table) { + mesh->patch_table_offset = patch_size; + patch_size += mesh->patch_table->total_size(); + } + } + } + else if (geom->type == Geometry::HAIR) { + Hair *hair = static_cast<Hair *>(geom); + + curve_key_size += hair->curve_keys.size(); + curve_size += hair->num_curves(); + } + } + + /* Create mapping from triangle to primitive triangle array. */ + vector<uint> tri_prim_index(tri_size); + if (for_displacement) { + /* For displacement kernels we do some trickery to make them believe + * we've got all required data ready. However, that data is different + * from final render kernels since we don't have BVH yet, so can't + * really use same semantic of arrays. + */ + foreach (Geometry *geom, scene->geometry) { + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + for (size_t i = 0; i < mesh->num_triangles(); ++i) { + tri_prim_index[i + mesh->prim_offset] = 3 * (i + mesh->prim_offset); + } + } + } + } + else { + for (size_t i = 0; i < dscene->prim_index.size(); ++i) { + if ((dscene->prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) { + tri_prim_index[dscene->prim_index[i]] = dscene->prim_tri_index[i]; + } + } + } + + /* Fill in all the arrays. */ + if (tri_size != 0) { + /* normals */ + progress.set_status("Updating Mesh", "Computing normals"); + + uint *tri_shader = dscene->tri_shader.alloc(tri_size); + float4 *vnormal = dscene->tri_vnormal.alloc(vert_size); + uint4 *tri_vindex = dscene->tri_vindex.alloc(tri_size); + uint *tri_patch = dscene->tri_patch.alloc(tri_size); + float2 *tri_patch_uv = dscene->tri_patch_uv.alloc(vert_size); + + foreach (Geometry *geom, scene->geometry) { + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + mesh->pack_shaders(scene, &tri_shader[mesh->prim_offset]); + mesh->pack_normals(&vnormal[mesh->vert_offset]); + mesh->pack_verts(tri_prim_index, + &tri_vindex[mesh->prim_offset], + &tri_patch[mesh->prim_offset], + &tri_patch_uv[mesh->vert_offset], + mesh->vert_offset, + mesh->prim_offset); + if (progress.get_cancel()) + return; + } + } + + /* vertex coordinates */ + progress.set_status("Updating Mesh", "Copying Mesh to device"); + + dscene->tri_shader.copy_to_device(); + dscene->tri_vnormal.copy_to_device(); + dscene->tri_vindex.copy_to_device(); + dscene->tri_patch.copy_to_device(); + dscene->tri_patch_uv.copy_to_device(); + } + + if (curve_size != 0) { + progress.set_status("Updating Mesh", "Copying Strands to device"); + + float4 *curve_keys = dscene->curve_keys.alloc(curve_key_size); + float4 *curves = dscene->curves.alloc(curve_size); + + foreach (Geometry *geom, scene->geometry) { + if (geom->type == Geometry::HAIR) { + Hair *hair = static_cast<Hair *>(geom); + hair->pack_curves(scene, + &curve_keys[hair->curvekey_offset], + &curves[hair->prim_offset], + hair->curvekey_offset); + if (progress.get_cancel()) + return; + } + } + + dscene->curve_keys.copy_to_device(); + dscene->curves.copy_to_device(); + } + + if (patch_size != 0) { + progress.set_status("Updating Mesh", "Copying Patches to device"); + + uint *patch_data = dscene->patches.alloc(patch_size); + + foreach (Geometry *geom, scene->geometry) { + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + mesh->pack_patches(&patch_data[mesh->patch_offset], + mesh->vert_offset, + mesh->face_offset, + mesh->corner_offset); + + if (mesh->patch_table) { + mesh->patch_table->copy_adjusting_offsets(&patch_data[mesh->patch_table_offset], + mesh->patch_table_offset); + } + + if (progress.get_cancel()) + return; + } + } + + dscene->patches.copy_to_device(); + } + + if (for_displacement) { + float4 *prim_tri_verts = dscene->prim_tri_verts.alloc(tri_size * 3); + foreach (Geometry *geom, scene->geometry) { + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + for (size_t i = 0; i < mesh->num_triangles(); ++i) { + Mesh::Triangle t = mesh->get_triangle(i); + size_t offset = 3 * (i + mesh->prim_offset); + prim_tri_verts[offset + 0] = float3_to_float4(mesh->verts[t.v[0]]); + prim_tri_verts[offset + 1] = float3_to_float4(mesh->verts[t.v[1]]); + prim_tri_verts[offset + 2] = float3_to_float4(mesh->verts[t.v[2]]); + } + } + } + dscene->prim_tri_verts.copy_to_device(); + } +} + +void GeometryManager::device_update_bvh(Device *device, + DeviceScene *dscene, + Scene *scene, + Progress &progress) +{ + /* bvh build */ + progress.set_status("Updating Scene BVH", "Building"); + + BVHParams bparams; + bparams.top_level = true; + bparams.bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout, + device->get_bvh_layout_mask()); + bparams.use_spatial_split = scene->params.use_bvh_spatial_split; + bparams.use_unaligned_nodes = dscene->data.bvh.have_curves && + scene->params.use_bvh_unaligned_nodes; + bparams.num_motion_triangle_steps = scene->params.num_bvh_time_steps; + bparams.num_motion_curve_steps = scene->params.num_bvh_time_steps; + bparams.bvh_type = scene->params.bvh_type; + bparams.curve_flags = dscene->data.curve.curveflags; + bparams.curve_subdivisions = dscene->data.curve.subdivisions; + + VLOG(1) << "Using " << bvh_layout_name(bparams.bvh_layout) << " layout."; + +#ifdef WITH_EMBREE + if (bparams.bvh_layout == BVH_LAYOUT_EMBREE) { + if (dscene->data.bvh.scene) { + BVHEmbree::destroy(dscene->data.bvh.scene); + } + } +#endif + + BVH *bvh = BVH::create(bparams, scene->geometry, scene->objects); + bvh->build(progress, &device->stats); + + if (progress.get_cancel()) { +#ifdef WITH_EMBREE + if (bparams.bvh_layout == BVH_LAYOUT_EMBREE) { + if (dscene->data.bvh.scene) { + BVHEmbree::destroy(dscene->data.bvh.scene); + } + } +#endif + delete bvh; + return; + } + + /* copy to device */ + progress.set_status("Updating Scene BVH", "Copying BVH to device"); + + PackedBVH &pack = bvh->pack; + + if (pack.nodes.size()) { + dscene->bvh_nodes.steal_data(pack.nodes); + dscene->bvh_nodes.copy_to_device(); + } + if (pack.leaf_nodes.size()) { + dscene->bvh_leaf_nodes.steal_data(pack.leaf_nodes); + dscene->bvh_leaf_nodes.copy_to_device(); + } + if (pack.object_node.size()) { + dscene->object_node.steal_data(pack.object_node); + dscene->object_node.copy_to_device(); + } + if (pack.prim_tri_index.size()) { + dscene->prim_tri_index.steal_data(pack.prim_tri_index); + dscene->prim_tri_index.copy_to_device(); + } + if (pack.prim_tri_verts.size()) { + dscene->prim_tri_verts.steal_data(pack.prim_tri_verts); + dscene->prim_tri_verts.copy_to_device(); + } + if (pack.prim_type.size()) { + dscene->prim_type.steal_data(pack.prim_type); + dscene->prim_type.copy_to_device(); + } + if (pack.prim_visibility.size()) { + dscene->prim_visibility.steal_data(pack.prim_visibility); + dscene->prim_visibility.copy_to_device(); + } + if (pack.prim_index.size()) { + dscene->prim_index.steal_data(pack.prim_index); + dscene->prim_index.copy_to_device(); + } + if (pack.prim_object.size()) { + dscene->prim_object.steal_data(pack.prim_object); + dscene->prim_object.copy_to_device(); + } + if (pack.prim_time.size()) { + dscene->prim_time.steal_data(pack.prim_time); + dscene->prim_time.copy_to_device(); + } + + dscene->data.bvh.root = pack.root_index; + dscene->data.bvh.bvh_layout = bparams.bvh_layout; + dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0); + + bvh->copy_to_device(progress, dscene); + + delete bvh; +} + +void GeometryManager::device_update_preprocess(Device *device, Scene *scene, Progress &progress) +{ + if (!need_update && !need_flags_update) { + return; + } + + progress.set_status("Updating Meshes Flags"); + + /* Update flags. */ + bool volume_images_updated = false; + + foreach (Geometry *geom, scene->geometry) { + geom->has_volume = false; + + foreach (const Shader *shader, geom->used_shaders) { + if (shader->has_volume) { + geom->has_volume = true; + } + if (shader->has_surface_bssrdf) { + geom->has_surface_bssrdf = true; + } + } + + if (need_update && geom->has_volume && geom->type == Geometry::MESH) { + /* Create volume meshes if there is voxel data. */ + if (geom->has_voxel_attributes()) { + if (!volume_images_updated) { + progress.set_status("Updating Meshes Volume Bounds"); + device_update_volume_images(device, scene, progress); + volume_images_updated = true; + } + + Mesh *mesh = static_cast<Mesh *>(geom); + create_volume_mesh(mesh, progress); + } + } + } + + need_flags_update = false; +} + +void GeometryManager::device_update_displacement_images(Device *device, + Scene *scene, + Progress &progress) +{ + progress.set_status("Updating Displacement Images"); + TaskPool pool; + ImageManager *image_manager = scene->image_manager; + set<int> bump_images; + foreach (Geometry *geom, scene->geometry) { + if (geom->need_update) { + foreach (Shader *shader, geom->used_shaders) { + if (!shader->has_displacement || shader->displacement_method == DISPLACE_BUMP) { + continue; + } + foreach (ShaderNode *node, shader->graph->nodes) { + if (node->special_type != SHADER_SPECIAL_TYPE_IMAGE_SLOT) { + continue; + } + + ImageSlotTextureNode *image_node = static_cast<ImageSlotTextureNode *>(node); + for (int i = 0; i < image_node->handle.num_tiles(); i++) { + const int slot = image_node->handle.svm_slot(i); + if (slot != -1) { + bump_images.insert(slot); + } + } + } + } + } + } + foreach (int slot, bump_images) { + pool.push(function_bind( + &ImageManager::device_update_slot, image_manager, device, scene, slot, &progress)); + } + pool.wait_work(); +} + +void GeometryManager::device_update_volume_images(Device *device, Scene *scene, Progress &progress) +{ + progress.set_status("Updating Volume Images"); + TaskPool pool; + ImageManager *image_manager = scene->image_manager; + set<int> volume_images; + + foreach (Geometry *geom, scene->geometry) { + if (!geom->need_update) { + continue; + } + + foreach (Attribute &attr, geom->attributes.attributes) { + if (attr.element != ATTR_ELEMENT_VOXEL) { + continue; + } + + ImageHandle &handle = attr.data_voxel(); + const int slot = handle.svm_slot(); + if (slot != -1) { + volume_images.insert(slot); + } + } + } + + foreach (int slot, volume_images) { + pool.push(function_bind( + &ImageManager::device_update_slot, image_manager, device, scene, slot, &progress)); + } + pool.wait_work(); +} + +void GeometryManager::device_update(Device *device, + DeviceScene *dscene, + Scene *scene, + Progress &progress) +{ + if (!need_update) + return; + + VLOG(1) << "Total " << scene->geometry.size() << " meshes."; + + bool true_displacement_used = false; + size_t total_tess_needed = 0; + + foreach (Geometry *geom, scene->geometry) { + foreach (Shader *shader, geom->used_shaders) { + if (shader->need_update_geometry) + geom->need_update = true; + } + + if (geom->need_update && geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + + /* Update normals. */ + mesh->add_face_normals(); + mesh->add_vertex_normals(); + + if (mesh->need_attribute(scene, ATTR_STD_POSITION_UNDISPLACED)) { + mesh->add_undisplaced(); + } + + /* Test if we need tessellation. */ + if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE && mesh->num_subd_verts == 0 && + mesh->subd_params) { + total_tess_needed++; + } + + /* Test if we need displacement. */ + if (mesh->has_true_displacement()) { + true_displacement_used = true; + } + + if (progress.get_cancel()) + return; + } + } + + /* Tessellate meshes that are using subdivision */ + if (total_tess_needed) { + Camera *dicing_camera = scene->dicing_camera; + dicing_camera->update(scene); + + size_t i = 0; + foreach (Geometry *geom, scene->geometry) { + if (!(geom->need_update && geom->type == Geometry::MESH)) { + continue; + } + + Mesh *mesh = static_cast<Mesh *>(geom); + if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE && mesh->num_subd_verts == 0 && + mesh->subd_params) { + string msg = "Tessellating "; + if (mesh->name == "") + msg += string_printf("%u/%u", (uint)(i + 1), (uint)total_tess_needed); + else + msg += string_printf( + "%s %u/%u", mesh->name.c_str(), (uint)(i + 1), (uint)total_tess_needed); + + progress.set_status("Updating Mesh", msg); + + mesh->subd_params->camera = dicing_camera; + DiagSplit dsplit(*mesh->subd_params); + mesh->tessellate(&dsplit); + + i++; + + if (progress.get_cancel()) + return; + } + } + } + + /* Update images needed for true displacement. */ + bool old_need_object_flags_update = false; + if (true_displacement_used) { + VLOG(1) << "Updating images used for true displacement."; + device_update_displacement_images(device, scene, progress); + old_need_object_flags_update = scene->object_manager->need_flags_update; + scene->object_manager->device_update_flags(device, dscene, scene, progress, false); + } + + /* Device update. */ + device_free(device, dscene); + + mesh_calc_offset(scene); + if (true_displacement_used) { + device_update_mesh(device, dscene, scene, true, progress); + } + if (progress.get_cancel()) + return; + + device_update_attributes(device, dscene, scene, progress); + if (progress.get_cancel()) + return; + + /* Update displacement. */ + bool displacement_done = false; + size_t num_bvh = 0; + BVHLayout bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout, + device->get_bvh_layout_mask()); + + foreach (Geometry *geom, scene->geometry) { + if (geom->need_update) { + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + if (displace(device, dscene, scene, mesh, progress)) { + displacement_done = true; + } + } + + if (geom->need_build_bvh(bvh_layout)) { + num_bvh++; + } + } + + if (progress.get_cancel()) + return; + } + + /* Device re-update after displacement. */ + if (displacement_done) { + device_free(device, dscene); + + device_update_attributes(device, dscene, scene, progress); + if (progress.get_cancel()) + return; + } + + TaskPool pool; + + size_t i = 0; + foreach (Geometry *geom, scene->geometry) { + if (geom->need_update) { + pool.push(function_bind( + &Geometry::compute_bvh, geom, device, dscene, &scene->params, &progress, i, num_bvh)); + if (geom->need_build_bvh(bvh_layout)) { + i++; + } + } + } + + TaskPool::Summary summary; + pool.wait_work(&summary); + VLOG(2) << "Objects BVH build pool statistics:\n" << summary.full_report(); + + foreach (Shader *shader, scene->shaders) { + shader->need_update_geometry = false; + } + + Scene::MotionType need_motion = scene->need_motion(); + bool motion_blur = need_motion == Scene::MOTION_BLUR; + + /* Update objects. */ + vector<Object *> volume_objects; + foreach (Object *object, scene->objects) { + object->compute_bounds(motion_blur); + } + + if (progress.get_cancel()) + return; + + device_update_bvh(device, dscene, scene, progress); + if (progress.get_cancel()) + return; + + device_update_mesh(device, dscene, scene, false, progress); + if (progress.get_cancel()) + return; + + need_update = false; + + if (true_displacement_used) { + /* Re-tag flags for update, so they're re-evaluated + * for meshes with correct bounding boxes. + * + * This wouldn't cause wrong results, just true + * displacement might be less optimal ot calculate. + */ + scene->object_manager->need_flags_update = old_need_object_flags_update; + } +} + +void GeometryManager::device_free(Device *device, DeviceScene *dscene) +{ + dscene->bvh_nodes.free(); + dscene->bvh_leaf_nodes.free(); + dscene->object_node.free(); + dscene->prim_tri_verts.free(); + dscene->prim_tri_index.free(); + dscene->prim_type.free(); + dscene->prim_visibility.free(); + dscene->prim_index.free(); + dscene->prim_object.free(); + dscene->prim_time.free(); + dscene->tri_shader.free(); + dscene->tri_vnormal.free(); + dscene->tri_vindex.free(); + dscene->tri_patch.free(); + dscene->tri_patch_uv.free(); + dscene->curves.free(); + dscene->curve_keys.free(); + dscene->patches.free(); + dscene->attributes_map.free(); + dscene->attributes_float.free(); + dscene->attributes_float2.free(); + dscene->attributes_float3.free(); + dscene->attributes_uchar4.free(); + + /* Signal for shaders like displacement not to do ray tracing. */ + dscene->data.bvh.bvh_layout = BVH_LAYOUT_NONE; + +#ifdef WITH_OSL + OSLGlobals *og = (OSLGlobals *)device->osl_memory(); + + if (og) { + og->object_name_map.clear(); + og->attribute_map.clear(); + og->object_names.clear(); + } +#else + (void)device; +#endif +} + +void GeometryManager::tag_update(Scene *scene) +{ + need_update = true; + scene->object_manager->need_update = true; +} + +void GeometryManager::collect_statistics(const Scene *scene, RenderStats *stats) +{ + foreach (Geometry *geometry, scene->geometry) { + stats->mesh.geometry.add_entry( + NamedSizeEntry(string(geometry->name.c_str()), geometry->get_total_size_in_bytes())); + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/render/geometry.h b/intern/cycles/render/geometry.h new file mode 100644 index 00000000000..b0284304843 --- /dev/null +++ b/intern/cycles/render/geometry.h @@ -0,0 +1,205 @@ +/* + * Copyright 2011-2020 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __GEOMETRY_H__ +#define __GEOMETRY_H__ + +#include "graph/node.h" + +#include "bvh/bvh_params.h" + +#include "render/attribute.h" + +#include "util/util_boundbox.h" +#include "util/util_set.h" +#include "util/util_transform.h" +#include "util/util_types.h" +#include "util/util_vector.h" + +CCL_NAMESPACE_BEGIN + +class BVH; +class Device; +class DeviceScene; +class Mesh; +class Progress; +class RenderStats; +class Scene; +class SceneParams; +class Shader; + +/* Geometry + * + * Base class for geometric types like Mesh and Hair. */ + +class Geometry : public Node { + public: + NODE_ABSTRACT_DECLARE + + enum Type { + MESH, + HAIR, + }; + + Type type; + + /* Attributes */ + AttributeSet attributes; + + /* Shaders */ + vector<Shader *> used_shaders; + + /* Transform */ + BoundBox bounds; + bool transform_applied; + bool transform_negative_scaled; + Transform transform_normal; + + /* Motion Blur */ + uint motion_steps; + bool use_motion_blur; + + /* Maximum number of motion steps supported (due to Embree). */ + static const uint MAX_MOTION_STEPS = 129; + + /* BVH */ + BVH *bvh; + size_t attr_map_offset; + size_t prim_offset; + size_t optix_prim_offset; + + /* Shader Properties */ + bool has_volume; /* Set in the device_update_flags(). */ + bool has_surface_bssrdf; /* Set in the device_update_flags(). */ + + /* Update Flags */ + bool need_update; + bool need_update_rebuild; + + /* Constructor/Destructor */ + explicit Geometry(const NodeType *node_type, const Type type); + virtual ~Geometry(); + + /* Geometry */ + virtual void clear(); + virtual void compute_bounds() = 0; + virtual void apply_transform(const Transform &tfm, const bool apply_to_motion) = 0; + + /* Attribute Requests */ + bool need_attribute(Scene *scene, AttributeStandard std); + bool need_attribute(Scene *scene, ustring name); + + /* UDIM */ + virtual void get_uv_tiles(ustring map, unordered_set<int> &tiles) = 0; + + /* Convert between normalized -1..1 motion time and index in the + * VERTEX_MOTION attribute. */ + float motion_time(int step) const; + int motion_step(float time) const; + + /* BVH */ + void compute_bvh(Device *device, + DeviceScene *dscene, + SceneParams *params, + Progress *progress, + int n, + int total); + + /* Check whether the geometry should have own BVH built separately. Briefly, + * own BVH is needed for geometry, if: + * + * - It is instanced multiple times, so each instance object should share the + * same BVH tree. + * - Special ray intersection is needed, for example to limit subsurface rays + * to only the geometry itself. + * - The BVH layout requires the top level to only contain instances. + */ + bool need_build_bvh(BVHLayout layout) const; + + /* Test if the geometry should be treated as instanced. */ + bool is_instanced() const; + + bool has_true_displacement() const; + bool has_motion_blur() const; + bool has_voxel_attributes() const; + + /* Updates */ + void tag_update(Scene *scene, bool rebuild); +}; + +/* Geometry Manager */ + +class GeometryManager { + public: + /* Update Flags */ + bool need_update; + bool need_flags_update; + + /* Constructor/Destructor */ + GeometryManager(); + ~GeometryManager(); + + /* Device Updates */ + void device_update_preprocess(Device *device, Scene *scene, Progress &progress); + void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress); + void device_free(Device *device, DeviceScene *dscene); + + /* Updates */ + void tag_update(Scene *scene); + + /* Statistics */ + void collect_statistics(const Scene *scene, RenderStats *stats); + + protected: + bool displace(Device *device, DeviceScene *dscene, Scene *scene, Mesh *mesh, Progress &progress); + + void create_volume_mesh(Mesh *mesh, Progress &progress); + + /* Attributes */ + void update_osl_attributes(Device *device, + Scene *scene, + vector<AttributeRequestSet> &geom_attributes); + void update_svm_attributes(Device *device, + DeviceScene *dscene, + Scene *scene, + vector<AttributeRequestSet> &geom_attributes); + + /* Compute verts/triangles/curves offsets in global arrays. */ + void mesh_calc_offset(Scene *scene); + + void device_update_object(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress); + + void device_update_mesh(Device *device, + DeviceScene *dscene, + Scene *scene, + bool for_displacement, + Progress &progress); + + void device_update_attributes(Device *device, + DeviceScene *dscene, + Scene *scene, + Progress &progress); + + void device_update_bvh(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress); + + void device_update_displacement_images(Device *device, Scene *scene, Progress &progress); + + void device_update_volume_images(Device *device, Scene *scene, Progress &progress); +}; + +CCL_NAMESPACE_END + +#endif /* __GEOMETRY_H__ */ diff --git a/intern/cycles/render/graph.cpp b/intern/cycles/render/graph.cpp index 0e520c700a7..d2db59894ea 100644 --- a/intern/cycles/render/graph.cpp +++ b/intern/cycles/render/graph.cpp @@ -14,12 +14,12 @@ * limitations under the License. */ -#include "render/attribute.h" #include "render/graph.h" +#include "render/attribute.h" +#include "render/constant_fold.h" #include "render/nodes.h" #include "render/scene.h" #include "render/shader.h" -#include "render/constant_fold.h" #include "util/util_algorithm.h" #include "util/util_foreach.h" diff --git a/intern/cycles/render/hair.cpp b/intern/cycles/render/hair.cpp new file mode 100644 index 00000000000..3daa4cc1e35 --- /dev/null +++ b/intern/cycles/render/hair.cpp @@ -0,0 +1,487 @@ +/* + * Copyright 2011-2020 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "render/hair.h" +#include "render/curves.h" +#include "render/scene.h" + +CCL_NAMESPACE_BEGIN + +/* Hair Curve */ + +void Hair::Curve::bounds_grow(const int k, + const float3 *curve_keys, + const float *curve_radius, + BoundBox &bounds) const +{ + float3 P[4]; + + P[0] = curve_keys[max(first_key + k - 1, first_key)]; + P[1] = curve_keys[first_key + k]; + P[2] = curve_keys[first_key + k + 1]; + P[3] = curve_keys[min(first_key + k + 2, first_key + num_keys - 1)]; + + float3 lower; + float3 upper; + + curvebounds(&lower.x, &upper.x, P, 0); + curvebounds(&lower.y, &upper.y, P, 1); + curvebounds(&lower.z, &upper.z, P, 2); + + float mr = max(curve_radius[first_key + k], curve_radius[first_key + k + 1]); + + bounds.grow(lower, mr); + bounds.grow(upper, mr); +} + +void Hair::Curve::bounds_grow(const int k, + const float3 *curve_keys, + const float *curve_radius, + const Transform &aligned_space, + BoundBox &bounds) const +{ + float3 P[4]; + + P[0] = curve_keys[max(first_key + k - 1, first_key)]; + P[1] = curve_keys[first_key + k]; + P[2] = curve_keys[first_key + k + 1]; + P[3] = curve_keys[min(first_key + k + 2, first_key + num_keys - 1)]; + + P[0] = transform_point(&aligned_space, P[0]); + P[1] = transform_point(&aligned_space, P[1]); + P[2] = transform_point(&aligned_space, P[2]); + P[3] = transform_point(&aligned_space, P[3]); + + float3 lower; + float3 upper; + + curvebounds(&lower.x, &upper.x, P, 0); + curvebounds(&lower.y, &upper.y, P, 1); + curvebounds(&lower.z, &upper.z, P, 2); + + float mr = max(curve_radius[first_key + k], curve_radius[first_key + k + 1]); + + bounds.grow(lower, mr); + bounds.grow(upper, mr); +} + +void Hair::Curve::bounds_grow(float4 keys[4], BoundBox &bounds) const +{ + float3 P[4] = { + float4_to_float3(keys[0]), + float4_to_float3(keys[1]), + float4_to_float3(keys[2]), + float4_to_float3(keys[3]), + }; + + float3 lower; + float3 upper; + + curvebounds(&lower.x, &upper.x, P, 0); + curvebounds(&lower.y, &upper.y, P, 1); + curvebounds(&lower.z, &upper.z, P, 2); + + float mr = max(keys[1].w, keys[2].w); + + bounds.grow(lower, mr); + bounds.grow(upper, mr); +} + +void Hair::Curve::motion_keys(const float3 *curve_keys, + const float *curve_radius, + const float3 *key_steps, + size_t num_curve_keys, + size_t num_steps, + float time, + size_t k0, + size_t k1, + float4 r_keys[2]) const +{ + /* Figure out which steps we need to fetch and their interpolation factor. */ + const size_t max_step = num_steps - 1; + const size_t step = min((int)(time * max_step), max_step - 1); + const float t = time * max_step - step; + /* Fetch vertex coordinates. */ + float4 curr_keys[2]; + float4 next_keys[2]; + keys_for_step( + curve_keys, curve_radius, key_steps, num_curve_keys, num_steps, step, k0, k1, curr_keys); + keys_for_step( + curve_keys, curve_radius, key_steps, num_curve_keys, num_steps, step + 1, k0, k1, next_keys); + /* Interpolate between steps. */ + r_keys[0] = (1.0f - t) * curr_keys[0] + t * next_keys[0]; + r_keys[1] = (1.0f - t) * curr_keys[1] + t * next_keys[1]; +} + +void Hair::Curve::cardinal_motion_keys(const float3 *curve_keys, + const float *curve_radius, + const float3 *key_steps, + size_t num_curve_keys, + size_t num_steps, + float time, + size_t k0, + size_t k1, + size_t k2, + size_t k3, + float4 r_keys[4]) const +{ + /* Figure out which steps we need to fetch and their interpolation factor. */ + const size_t max_step = num_steps - 1; + const size_t step = min((int)(time * max_step), max_step - 1); + const float t = time * max_step - step; + /* Fetch vertex coordinates. */ + float4 curr_keys[4]; + float4 next_keys[4]; + cardinal_keys_for_step(curve_keys, + curve_radius, + key_steps, + num_curve_keys, + num_steps, + step, + k0, + k1, + k2, + k3, + curr_keys); + cardinal_keys_for_step(curve_keys, + curve_radius, + key_steps, + num_curve_keys, + num_steps, + step + 1, + k0, + k1, + k2, + k3, + next_keys); + /* Interpolate between steps. */ + r_keys[0] = (1.0f - t) * curr_keys[0] + t * next_keys[0]; + r_keys[1] = (1.0f - t) * curr_keys[1] + t * next_keys[1]; + r_keys[2] = (1.0f - t) * curr_keys[2] + t * next_keys[2]; + r_keys[3] = (1.0f - t) * curr_keys[3] + t * next_keys[3]; +} + +void Hair::Curve::keys_for_step(const float3 *curve_keys, + const float *curve_radius, + const float3 *key_steps, + size_t num_curve_keys, + size_t num_steps, + size_t step, + size_t k0, + size_t k1, + float4 r_keys[2]) const +{ + k0 = max(k0, 0); + k1 = min(k1, num_keys - 1); + const size_t center_step = ((num_steps - 1) / 2); + if (step == center_step) { + /* Center step: regular key location. */ + /* TODO(sergey): Consider adding make_float4(float3, float) + * function. + */ + r_keys[0] = make_float4(curve_keys[first_key + k0].x, + curve_keys[first_key + k0].y, + curve_keys[first_key + k0].z, + curve_radius[first_key + k0]); + r_keys[1] = make_float4(curve_keys[first_key + k1].x, + curve_keys[first_key + k1].y, + curve_keys[first_key + k1].z, + curve_radius[first_key + k1]); + } + else { + /* Center step is not stored in this array. */ + if (step > center_step) { + step--; + } + const size_t offset = first_key + step * num_curve_keys; + r_keys[0] = make_float4(key_steps[offset + k0].x, + key_steps[offset + k0].y, + key_steps[offset + k0].z, + curve_radius[first_key + k0]); + r_keys[1] = make_float4(key_steps[offset + k1].x, + key_steps[offset + k1].y, + key_steps[offset + k1].z, + curve_radius[first_key + k1]); + } +} + +void Hair::Curve::cardinal_keys_for_step(const float3 *curve_keys, + const float *curve_radius, + const float3 *key_steps, + size_t num_curve_keys, + size_t num_steps, + size_t step, + size_t k0, + size_t k1, + size_t k2, + size_t k3, + float4 r_keys[4]) const +{ + k0 = max(k0, 0); + k3 = min(k3, num_keys - 1); + const size_t center_step = ((num_steps - 1) / 2); + if (step == center_step) { + /* Center step: regular key location. */ + r_keys[0] = make_float4(curve_keys[first_key + k0].x, + curve_keys[first_key + k0].y, + curve_keys[first_key + k0].z, + curve_radius[first_key + k0]); + r_keys[1] = make_float4(curve_keys[first_key + k1].x, + curve_keys[first_key + k1].y, + curve_keys[first_key + k1].z, + curve_radius[first_key + k1]); + r_keys[2] = make_float4(curve_keys[first_key + k2].x, + curve_keys[first_key + k2].y, + curve_keys[first_key + k2].z, + curve_radius[first_key + k2]); + r_keys[3] = make_float4(curve_keys[first_key + k3].x, + curve_keys[first_key + k3].y, + curve_keys[first_key + k3].z, + curve_radius[first_key + k3]); + } + else { + /* Center step is not stored in this array. */ + if (step > center_step) { + step--; + } + const size_t offset = first_key + step * num_curve_keys; + r_keys[0] = make_float4(key_steps[offset + k0].x, + key_steps[offset + k0].y, + key_steps[offset + k0].z, + curve_radius[first_key + k0]); + r_keys[1] = make_float4(key_steps[offset + k1].x, + key_steps[offset + k1].y, + key_steps[offset + k1].z, + curve_radius[first_key + k1]); + r_keys[2] = make_float4(key_steps[offset + k2].x, + key_steps[offset + k2].y, + key_steps[offset + k2].z, + curve_radius[first_key + k2]); + r_keys[3] = make_float4(key_steps[offset + k3].x, + key_steps[offset + k3].y, + key_steps[offset + k3].z, + curve_radius[first_key + k3]); + } +} + +/* Hair */ + +NODE_DEFINE(Hair) +{ + NodeType *type = NodeType::add("hair", create, NodeType::NONE, Geometry::node_base_type); + + SOCKET_POINT_ARRAY(curve_keys, "Curve Keys", array<float3>()); + SOCKET_FLOAT_ARRAY(curve_radius, "Curve Radius", array<float>()); + SOCKET_INT_ARRAY(curve_first_key, "Curve First Key", array<int>()); + SOCKET_INT_ARRAY(curve_shader, "Curve Shader", array<int>()); + + return type; +} + +Hair::Hair() : Geometry(node_type, Geometry::HAIR) +{ + curvekey_offset = 0; +} + +Hair::~Hair() +{ +} + +void Hair::resize_curves(int numcurves, int numkeys) +{ + curve_keys.resize(numkeys); + curve_radius.resize(numkeys); + curve_first_key.resize(numcurves); + curve_shader.resize(numcurves); + + attributes.resize(); +} + +void Hair::reserve_curves(int numcurves, int numkeys) +{ + curve_keys.reserve(numkeys); + curve_radius.reserve(numkeys); + curve_first_key.reserve(numcurves); + curve_shader.reserve(numcurves); + + attributes.resize(true); +} + +void Hair::clear() +{ + Geometry::clear(); + + curve_keys.clear(); + curve_radius.clear(); + curve_first_key.clear(); + curve_shader.clear(); + + attributes.clear(); +} + +void Hair::add_curve_key(float3 co, float radius) +{ + curve_keys.push_back_reserved(co); + curve_radius.push_back_reserved(radius); +} + +void Hair::add_curve(int first_key, int shader) +{ + curve_first_key.push_back_reserved(first_key); + curve_shader.push_back_reserved(shader); +} + +void Hair::copy_center_to_motion_step(const int motion_step) +{ + Attribute *attr_mP = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + if (attr_mP) { + float3 *keys = &curve_keys[0]; + size_t numkeys = curve_keys.size(); + memcpy(attr_mP->data_float3() + motion_step * numkeys, keys, sizeof(float3) * numkeys); + } +} + +void Hair::get_uv_tiles(ustring map, unordered_set<int> &tiles) +{ + Attribute *attr; + + if (map.empty()) { + attr = attributes.find(ATTR_STD_UV); + } + else { + attr = attributes.find(map); + } + + if (attr) { + attr->get_uv_tiles(this, ATTR_PRIM_GEOMETRY, tiles); + } +} + +void Hair::compute_bounds() +{ + BoundBox bnds = BoundBox::empty; + size_t curve_keys_size = curve_keys.size(); + + if (curve_keys_size > 0) { + for (size_t i = 0; i < curve_keys_size; i++) + bnds.grow(curve_keys[i], curve_radius[i]); + + Attribute *curve_attr = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + if (use_motion_blur && curve_attr) { + size_t steps_size = curve_keys.size() * (motion_steps - 1); + float3 *key_steps = curve_attr->data_float3(); + + for (size_t i = 0; i < steps_size; i++) + bnds.grow(key_steps[i]); + } + + if (!bnds.valid()) { + bnds = BoundBox::empty; + + /* skip nan or inf coordinates */ + for (size_t i = 0; i < curve_keys_size; i++) + bnds.grow_safe(curve_keys[i], curve_radius[i]); + + if (use_motion_blur && curve_attr) { + size_t steps_size = curve_keys.size() * (motion_steps - 1); + float3 *key_steps = curve_attr->data_float3(); + + for (size_t i = 0; i < steps_size; i++) + bnds.grow_safe(key_steps[i]); + } + } + } + + if (!bnds.valid()) { + /* empty mesh */ + bnds.grow(make_float3(0.0f, 0.0f, 0.0f)); + } + + bounds = bnds; +} + +void Hair::apply_transform(const Transform &tfm, const bool apply_to_motion) +{ + /* compute uniform scale */ + float3 c0 = transform_get_column(&tfm, 0); + float3 c1 = transform_get_column(&tfm, 1); + float3 c2 = transform_get_column(&tfm, 2); + float scalar = powf(fabsf(dot(cross(c0, c1), c2)), 1.0f / 3.0f); + + /* apply transform to curve keys */ + for (size_t i = 0; i < curve_keys.size(); i++) { + float3 co = transform_point(&tfm, curve_keys[i]); + float radius = curve_radius[i] * scalar; + + /* scale for curve radius is only correct for uniform scale */ + curve_keys[i] = co; + curve_radius[i] = radius; + } + + if (apply_to_motion) { + Attribute *curve_attr = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + + if (curve_attr) { + /* apply transform to motion curve keys */ + size_t steps_size = curve_keys.size() * (motion_steps - 1); + float4 *key_steps = curve_attr->data_float4(); + + for (size_t i = 0; i < steps_size; i++) { + float3 co = transform_point(&tfm, float4_to_float3(key_steps[i])); + float radius = key_steps[i].w * scalar; + + /* scale for curve radius is only correct for uniform scale */ + key_steps[i] = float3_to_float4(co); + key_steps[i].w = radius; + } + } + } +} + +void Hair::pack_curves(Scene *scene, + float4 *curve_key_co, + float4 *curve_data, + size_t curvekey_offset) +{ + size_t curve_keys_size = curve_keys.size(); + + /* pack curve keys */ + if (curve_keys_size) { + float3 *keys_ptr = curve_keys.data(); + float *radius_ptr = curve_radius.data(); + + for (size_t i = 0; i < curve_keys_size; i++) + curve_key_co[i] = make_float4(keys_ptr[i].x, keys_ptr[i].y, keys_ptr[i].z, radius_ptr[i]); + } + + /* pack curve segments */ + size_t curve_num = num_curves(); + + for (size_t i = 0; i < curve_num; i++) { + Curve curve = get_curve(i); + int shader_id = curve_shader[i]; + Shader *shader = (shader_id < used_shaders.size()) ? used_shaders[shader_id] : + scene->default_surface; + shader_id = scene->shader_manager->get_shader_id(shader, false); + + curve_data[i] = make_float4(__int_as_float(curve.first_key + curvekey_offset), + __int_as_float(curve.num_keys), + __int_as_float(shader_id), + 0.0f); + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/render/hair.h b/intern/cycles/render/hair.h new file mode 100644 index 00000000000..79f77a78753 --- /dev/null +++ b/intern/cycles/render/hair.h @@ -0,0 +1,151 @@ +/* + * Copyright 2011-2020 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __HAIR_H__ +#define __HAIR_H__ + +#include "render/geometry.h" + +CCL_NAMESPACE_BEGIN + +class Hair : public Geometry { + public: + NODE_DECLARE + + /* Hair Curve */ + struct Curve { + int first_key; + int num_keys; + + int num_segments() const + { + return num_keys - 1; + } + + void bounds_grow(const int k, + const float3 *curve_keys, + const float *curve_radius, + BoundBox &bounds) const; + void bounds_grow(float4 keys[4], BoundBox &bounds) const; + void bounds_grow(const int k, + const float3 *curve_keys, + const float *curve_radius, + const Transform &aligned_space, + BoundBox &bounds) const; + + void motion_keys(const float3 *curve_keys, + const float *curve_radius, + const float3 *key_steps, + size_t num_curve_keys, + size_t num_steps, + float time, + size_t k0, + size_t k1, + float4 r_keys[2]) const; + void cardinal_motion_keys(const float3 *curve_keys, + const float *curve_radius, + const float3 *key_steps, + size_t num_curve_keys, + size_t num_steps, + float time, + size_t k0, + size_t k1, + size_t k2, + size_t k3, + float4 r_keys[4]) const; + + void keys_for_step(const float3 *curve_keys, + const float *curve_radius, + const float3 *key_steps, + size_t num_curve_keys, + size_t num_steps, + size_t step, + size_t k0, + size_t k1, + float4 r_keys[2]) const; + void cardinal_keys_for_step(const float3 *curve_keys, + const float *curve_radius, + const float3 *key_steps, + size_t num_curve_keys, + size_t num_steps, + size_t step, + size_t k0, + size_t k1, + size_t k2, + size_t k3, + float4 r_keys[4]) const; + }; + + array<float3> curve_keys; + array<float> curve_radius; + array<int> curve_first_key; + array<int> curve_shader; + + /* BVH */ + size_t curvekey_offset; + + /* Constructor/Destructor */ + Hair(); + ~Hair(); + + /* Geometry */ + void clear() override; + + void resize_curves(int numcurves, int numkeys); + void reserve_curves(int numcurves, int numkeys); + void add_curve_key(float3 loc, float radius); + void add_curve(int first_key, int shader); + + void copy_center_to_motion_step(const int motion_step); + + void compute_bounds() override; + void apply_transform(const Transform &tfm, const bool apply_to_motion) override; + + /* Curves */ + Curve get_curve(size_t i) const + { + int first = curve_first_key[i]; + int next_first = (i + 1 < curve_first_key.size()) ? curve_first_key[i + 1] : curve_keys.size(); + + Curve curve = {first, next_first - first}; + return curve; + } + + size_t num_keys() const + { + return curve_keys.size(); + } + + size_t num_curves() const + { + return curve_first_key.size(); + } + + size_t num_segments() const + { + return curve_keys.size() - curve_first_key.size(); + } + + /* UDIM */ + void get_uv_tiles(ustring map, unordered_set<int> &tiles) override; + + /* BVH */ + void pack_curves(Scene *scene, float4 *curve_key_co, float4 *curve_data, size_t curvekey_offset); +}; + +CCL_NAMESPACE_END + +#endif /* __HAIR_H__ */ diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 212a867f9cd..67ed1176171 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -17,10 +17,12 @@ #include "render/image.h" #include "device/device.h" #include "render/colorspace.h" +#include "render/image_oiio.h" #include "render/scene.h" #include "render/stats.h" #include "util/util_foreach.h" +#include "util/util_image.h" #include "util/util_image_impl.h" #include "util/util_logging.h" #include "util/util_path.h" @@ -50,21 +52,6 @@ bool isfinite(uint16_t /*value*/) return true; } -/* The lower three bits of a device texture slot number indicate its type. - * These functions convert the slot ids from ImageManager "images" ones - * to device ones and vice verse. - */ -int type_index_to_flattened_slot(int slot, ImageDataType type) -{ - return (slot << IMAGE_DATA_TYPE_SHIFT) | (type); -} - -int flattened_slot_to_type_index(int flat_slot, ImageDataType *type) -{ - *type = (ImageDataType)(flat_slot & IMAGE_DATA_TYPE_MASK); - return flat_slot >> IMAGE_DATA_TYPE_SHIFT; -} - const char *name_from_type(ImageDataType type) { switch (type) { @@ -94,342 +81,352 @@ const char *name_from_type(ImageDataType type) } // namespace -ImageManager::ImageManager(const DeviceInfo &info) +/* Image Handle */ + +ImageHandle::ImageHandle() : manager(NULL) { - need_update = true; - osl_texture_system = NULL; - animation_frame = 0; +} - /* Set image limits */ - max_num_images = TEX_NUM_MAX; - has_half_images = info.has_half_images; +ImageHandle::ImageHandle(const ImageHandle &other) + : tile_slots(other.tile_slots), manager(other.manager) +{ + /* Increase image user count. */ + foreach (const int slot, tile_slots) { + manager->add_image_user(slot); + } +} + +ImageHandle &ImageHandle::operator=(const ImageHandle &other) +{ + clear(); + manager = other.manager; + tile_slots = other.tile_slots; - for (size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { - tex_num_images[type] = 0; + foreach (const int slot, tile_slots) { + manager->add_image_user(slot); } + + return *this; } -ImageManager::~ImageManager() +ImageHandle::~ImageHandle() +{ + clear(); +} + +void ImageHandle::clear() { - for (size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { - for (size_t slot = 0; slot < images[type].size(); slot++) - assert(!images[type][slot]); + foreach (const int slot, tile_slots) { + manager->remove_image_user(slot); } + + tile_slots.clear(); + manager = NULL; } -void ImageManager::set_osl_texture_system(void *texture_system) +bool ImageHandle::empty() { - osl_texture_system = texture_system; + return tile_slots.empty(); } -bool ImageManager::set_animation_frame_update(int frame) +int ImageHandle::num_tiles() { - if (frame != animation_frame) { - animation_frame = frame; + return tile_slots.size(); +} - for (size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { - for (size_t slot = 0; slot < images[type].size(); slot++) { - if (images[type][slot] && images[type][slot]->animated) - return true; - } - } +ImageMetaData ImageHandle::metadata() +{ + if (tile_slots.empty()) { + return ImageMetaData(); } - return false; + ImageManager::Image *img = manager->images[tile_slots.front()]; + manager->load_image_metadata(img); + return img->metadata; } -device_memory *ImageManager::image_memory(int flat_slot) +int ImageHandle::svm_slot(const int tile_index) const { - ImageDataType type; - int slot = flattened_slot_to_type_index(flat_slot, &type); + if (tile_index >= tile_slots.size()) { + return -1; + } - Image *img = images[type][slot]; + if (manager->osl_texture_system) { + ImageManager::Image *img = manager->images[tile_slots[tile_index]]; + if (!img->loader->osl_filepath().empty()) { + return -1; + } + } - return img->mem; + return tile_slots[tile_index]; } -bool ImageManager::get_image_metadata(int flat_slot, ImageMetaData &metadata) +device_texture *ImageHandle::image_memory(const int tile_index) const { - if (flat_slot == -1) { - return false; + if (tile_index >= tile_slots.size()) { + return NULL; } - ImageDataType type; - int slot = flattened_slot_to_type_index(flat_slot, &type); + ImageManager::Image *img = manager->images[tile_slots[tile_index]]; + return img ? img->mem : NULL; +} - Image *img = images[type][slot]; - if (img) { - metadata = img->metadata; - return true; - } +bool ImageHandle::operator==(const ImageHandle &other) const +{ + return manager == other.manager && tile_slots == other.tile_slots; +} - return false; +/* Image MetaData */ + +ImageMetaData::ImageMetaData() + : channels(0), + width(0), + height(0), + depth(0), + type(IMAGE_DATA_NUM_TYPES), + colorspace(u_colorspace_raw), + colorspace_file_format(""), + use_transform_3d(false), + compress_as_srgb(false) +{ +} + +bool ImageMetaData::operator==(const ImageMetaData &other) const +{ + return channels == other.channels && width == other.width && height == other.height && + depth == other.depth && use_transform_3d == other.use_transform_3d && + (!use_transform_3d || transform_3d == other.transform_3d) && type == other.type && + colorspace == other.colorspace && compress_as_srgb == other.compress_as_srgb; +} + +bool ImageMetaData::is_float() const +{ + return (type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4 || + type == IMAGE_DATA_TYPE_HALF || type == IMAGE_DATA_TYPE_HALF4); } -void ImageManager::metadata_detect_colorspace(ImageMetaData &metadata, const char *file_format) +void ImageMetaData::detect_colorspace() { /* Convert used specified color spaces to one we know how to handle. */ - metadata.colorspace = ColorSpaceManager::detect_known_colorspace( - metadata.colorspace, file_format, metadata.is_float || metadata.is_half); + colorspace = ColorSpaceManager::detect_known_colorspace( + colorspace, colorspace_file_format, is_float()); - if (metadata.colorspace == u_colorspace_raw) { + if (colorspace == u_colorspace_raw) { /* Nothing to do. */ } - else if (metadata.colorspace == u_colorspace_srgb) { + else if (colorspace == u_colorspace_srgb) { /* Keep sRGB colorspace stored as sRGB, to save memory and/or loading time * for the common case of 8bit sRGB images like PNG. */ - metadata.compress_as_srgb = true; + compress_as_srgb = true; } else { /* Always compress non-raw 8bit images as scene linear + sRGB, as a * heuristic to keep memory usage the same without too much data loss * due to quantization in common cases. */ - metadata.compress_as_srgb = (metadata.type == IMAGE_DATA_TYPE_BYTE || - metadata.type == IMAGE_DATA_TYPE_BYTE4); + compress_as_srgb = (type == IMAGE_DATA_TYPE_BYTE || type == IMAGE_DATA_TYPE_BYTE4); /* If colorspace conversion needed, use half instead of short so we can * represent HDR values that might result from conversion. */ - if (metadata.type == IMAGE_DATA_TYPE_USHORT) { - metadata.type = IMAGE_DATA_TYPE_HALF; + if (type == IMAGE_DATA_TYPE_USHORT) { + type = IMAGE_DATA_TYPE_HALF; } - else if (metadata.type == IMAGE_DATA_TYPE_USHORT4) { - metadata.type = IMAGE_DATA_TYPE_HALF4; + else if (type == IMAGE_DATA_TYPE_USHORT4) { + type = IMAGE_DATA_TYPE_HALF4; } } } -bool ImageManager::get_image_metadata(const string &filename, - void *builtin_data, - ustring colorspace, - ImageMetaData &metadata) -{ - metadata = ImageMetaData(); - metadata.colorspace = colorspace; - - if (builtin_data) { - if (builtin_image_info_cb) { - builtin_image_info_cb(filename, builtin_data, metadata); - } - else { - return false; - } +/* Image Loader */ - if (metadata.is_float) { - metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_FLOAT; - } - else { - metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_BYTE4 : IMAGE_DATA_TYPE_BYTE; - } +ImageLoader::ImageLoader() +{ +} - metadata_detect_colorspace(metadata, ""); +ustring ImageLoader::osl_filepath() const +{ + return ustring(); +} +bool ImageLoader::equals(const ImageLoader *a, const ImageLoader *b) +{ + if (a == NULL && b == NULL) { return true; } - - /* Perform preliminary checks, with meaningful logging. */ - if (!path_exists(filename)) { - VLOG(1) << "File '" << filename << "' does not exist."; - return false; - } - if (path_is_directory(filename)) { - VLOG(1) << "File '" << filename << "' is a directory, can't use as image."; - return false; + else { + return (a && b && typeid(*a) == typeid(*b) && a->equals(*b)); } +} - unique_ptr<ImageInput> in(ImageInput::create(filename)); +/* Image Manager */ - if (!in) { - return false; - } +ImageManager::ImageManager(const DeviceInfo &info) +{ + need_update = true; + osl_texture_system = NULL; + animation_frame = 0; - ImageSpec spec; - if (!in->open(filename, spec)) { - return false; - } + /* Set image limits */ + has_half_images = info.has_half_images; +} - metadata.width = spec.width; - metadata.height = spec.height; - metadata.depth = spec.depth; - metadata.compress_as_srgb = false; +ImageManager::~ImageManager() +{ + for (size_t slot = 0; slot < images.size(); slot++) + assert(!images[slot]); +} - /* Check the main format, and channel formats. */ - size_t channel_size = spec.format.basesize(); +void ImageManager::set_osl_texture_system(void *texture_system) +{ + osl_texture_system = texture_system; +} - if (spec.format.is_floating_point()) { - metadata.is_float = true; - } +bool ImageManager::set_animation_frame_update(int frame) +{ + if (frame != animation_frame) { + animation_frame = frame; - for (size_t channel = 0; channel < spec.channelformats.size(); channel++) { - channel_size = max(channel_size, spec.channelformats[channel].basesize()); - if (spec.channelformats[channel].is_floating_point()) { - metadata.is_float = true; + for (size_t slot = 0; slot < images.size(); slot++) { + if (images[slot] && images[slot]->params.animated) + return true; } } - /* check if it's half float */ - if (spec.format == TypeDesc::HALF) { - metadata.is_half = true; - } - - /* set type and channels */ - metadata.channels = spec.nchannels; + return false; +} - if (metadata.is_half) { - metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_HALF4 : IMAGE_DATA_TYPE_HALF; +void ImageManager::load_image_metadata(Image *img) +{ + if (!img->need_metadata) { + return; } - else if (metadata.is_float) { - metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_FLOAT; + + thread_scoped_lock image_lock(img->mutex); + if (!img->need_metadata) { + return; } - else if (spec.format == TypeDesc::USHORT) { - metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_USHORT4 : IMAGE_DATA_TYPE_USHORT; + + ImageMetaData &metadata = img->metadata; + metadata = ImageMetaData(); + metadata.colorspace = img->params.colorspace; + + if (img->loader->load_metadata(metadata)) { + assert(metadata.type != IMAGE_DATA_NUM_TYPES); } else { - metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_BYTE4 : IMAGE_DATA_TYPE_BYTE; + metadata.type = IMAGE_DATA_TYPE_BYTE4; + } + + metadata.detect_colorspace(); + + /* No half textures on OpenCL, use full float instead. */ + if (!has_half_images) { + if (metadata.type == IMAGE_DATA_TYPE_HALF4) { + metadata.type = IMAGE_DATA_TYPE_FLOAT4; + } + else if (metadata.type == IMAGE_DATA_TYPE_HALF) { + metadata.type = IMAGE_DATA_TYPE_FLOAT; + } } - metadata_detect_colorspace(metadata, in->format_name()); + img->need_metadata = false; +} + +ImageHandle ImageManager::add_image(const string &filename, const ImageParams ¶ms) +{ + const int slot = add_image_slot(new OIIOImageLoader(filename), params, false); - in->close(); + ImageHandle handle; + handle.tile_slots.push_back(slot); + handle.manager = this; + return handle; +} - return true; +ImageHandle ImageManager::add_image(const string &filename, + const ImageParams ¶ms, + const vector<int> &tiles) +{ + ImageHandle handle; + handle.manager = this; + + foreach (int tile, tiles) { + string tile_filename = filename; + if (tile != 0) { + string_replace(tile_filename, "<UDIM>", string_printf("%04d", tile)); + } + const int slot = add_image_slot(new OIIOImageLoader(tile_filename), params, false); + handle.tile_slots.push_back(slot); + } + + return handle; } -static bool image_equals(ImageManager::Image *image, - const string &filename, - void *builtin_data, - InterpolationType interpolation, - ExtensionType extension, - ImageAlphaType alpha_type, - ustring colorspace) +ImageHandle ImageManager::add_image(ImageLoader *loader, const ImageParams ¶ms) { - return image->filename == filename && image->builtin_data == builtin_data && - image->interpolation == interpolation && image->extension == extension && - image->alpha_type == alpha_type && image->colorspace == colorspace; + const int slot = add_image_slot(loader, params, true); + + ImageHandle handle; + handle.tile_slots.push_back(slot); + handle.manager = this; + return handle; } -int ImageManager::add_image(const string &filename, - void *builtin_data, - bool animated, - float frame, - InterpolationType interpolation, - ExtensionType extension, - ImageAlphaType alpha_type, - ustring colorspace, - ImageMetaData &metadata) +int ImageManager::add_image_slot(ImageLoader *loader, + const ImageParams ¶ms, + const bool builtin) { Image *img; size_t slot; - get_image_metadata(filename, builtin_data, colorspace, metadata); - ImageDataType type = metadata.type; - thread_scoped_lock device_lock(device_mutex); - /* No half textures on OpenCL, use full float instead. */ - if (!has_half_images) { - if (type == IMAGE_DATA_TYPE_HALF4) { - type = IMAGE_DATA_TYPE_FLOAT4; - } - else if (type == IMAGE_DATA_TYPE_HALF) { - type = IMAGE_DATA_TYPE_FLOAT; - } - } - /* Fnd existing image. */ - for (slot = 0; slot < images[type].size(); slot++) { - img = images[type][slot]; - if (img && - image_equals( - img, filename, builtin_data, interpolation, extension, alpha_type, colorspace)) { - if (img->frame != frame) { - img->frame = frame; - img->need_load = true; - } - if (img->alpha_type != alpha_type) { - img->alpha_type = alpha_type; - img->need_load = true; - } - if (img->colorspace != colorspace) { - img->colorspace = colorspace; - img->need_load = true; - } - if (!(img->metadata == metadata)) { - img->metadata = metadata; - img->need_load = true; - } + for (slot = 0; slot < images.size(); slot++) { + img = images[slot]; + if (img && ImageLoader::equals(img->loader, loader) && img->params == params) { img->users++; - return type_index_to_flattened_slot(slot, type); + delete loader; + return slot; } } /* Find free slot. */ - for (slot = 0; slot < images[type].size(); slot++) { - if (!images[type][slot]) + for (slot = 0; slot < images.size(); slot++) { + if (!images[slot]) break; } - /* Count if we're over the limit. - * Very unlikely, since max_num_images is insanely big. But better safe - * than sorry. - */ - int tex_count = 0; - for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { - tex_count += tex_num_images[type]; - } - if (tex_count > max_num_images) { - printf( - "ImageManager::add_image: Reached image limit (%d), " - "skipping '%s'\n", - max_num_images, - filename.c_str()); - return -1; - } - - if (slot == images[type].size()) { - images[type].resize(images[type].size() + 1); + if (slot == images.size()) { + images.resize(images.size() + 1); } /* Add new image. */ img = new Image(); - img->filename = filename; - img->builtin_data = builtin_data; - img->metadata = metadata; - img->need_load = true; - img->animated = animated; - img->frame = frame; - img->interpolation = interpolation; - img->extension = extension; + img->params = params; + img->loader = loader; + img->need_metadata = true; + img->need_load = !(osl_texture_system && !img->loader->osl_filepath().empty()); + img->builtin = builtin; img->users = 1; - img->alpha_type = alpha_type; - img->colorspace = colorspace; img->mem = NULL; - images[type][slot] = img; - - ++tex_num_images[type]; + images[slot] = img; need_update = true; - return type_index_to_flattened_slot(slot, type); + return slot; } -void ImageManager::add_image_user(int flat_slot) +void ImageManager::add_image_user(int slot) { - ImageDataType type; - int slot = flattened_slot_to_type_index(flat_slot, &type); - - Image *image = images[type][slot]; + Image *image = images[slot]; assert(image && image->users >= 1); image->users++; } -void ImageManager::remove_image(int flat_slot) +void ImageManager::remove_image_user(int slot) { - ImageDataType type; - int slot = flattened_slot_to_type_index(flat_slot, &type); - - Image *image = images[type][slot]; + Image *image = images[slot]; assert(image && image->users >= 1); /* decrement user count */ @@ -442,119 +439,20 @@ void ImageManager::remove_image(int flat_slot) need_update = true; } -void ImageManager::remove_image(const string &filename, - void *builtin_data, - InterpolationType interpolation, - ExtensionType extension, - ImageAlphaType alpha_type, - ustring colorspace) -{ - size_t slot; - - for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { - for (slot = 0; slot < images[type].size(); slot++) { - if (images[type][slot] && image_equals(images[type][slot], - filename, - builtin_data, - interpolation, - extension, - alpha_type, - colorspace)) { - remove_image(type_index_to_flattened_slot(slot, (ImageDataType)type)); - return; - } - } - } -} - -/* TODO(sergey): Deduplicate with the iteration above, but make it pretty, - * without bunch of arguments passing around making code readability even - * more cluttered. - */ -void ImageManager::tag_reload_image(const string &filename, - void *builtin_data, - InterpolationType interpolation, - ExtensionType extension, - ImageAlphaType alpha_type, - ustring colorspace) -{ - for (size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { - for (size_t slot = 0; slot < images[type].size(); slot++) { - if (images[type][slot] && image_equals(images[type][slot], - filename, - builtin_data, - interpolation, - extension, - alpha_type, - colorspace)) { - images[type][slot]->need_load = true; - break; - } - } - } -} - static bool image_associate_alpha(ImageManager::Image *img) { /* For typical RGBA images we let OIIO convert to associated alpha, * but some types we want to leave the RGB channels untouched. */ - return !(ColorSpaceManager::colorspace_is_data(img->colorspace) || - img->alpha_type == IMAGE_ALPHA_IGNORE || img->alpha_type == IMAGE_ALPHA_CHANNEL_PACKED); + return !(ColorSpaceManager::colorspace_is_data(img->params.colorspace) || + img->params.alpha_type == IMAGE_ALPHA_IGNORE || + img->params.alpha_type == IMAGE_ALPHA_CHANNEL_PACKED); } -bool ImageManager::file_load_image_generic(Image *img, unique_ptr<ImageInput> *in) +template<TypeDesc::BASETYPE FileFormat, typename StorageType> +bool ImageManager::file_load_image(Image *img, int texture_limit) { - if (img->filename == "") - return false; - - if (!img->builtin_data) { - /* NOTE: Error logging is done in meta data acquisition. */ - if (!path_exists(img->filename) || path_is_directory(img->filename)) { - return false; - } - - /* load image from file through OIIO */ - *in = unique_ptr<ImageInput>(ImageInput::create(img->filename)); - - if (!*in) - return false; - - ImageSpec spec = ImageSpec(); - ImageSpec config = ImageSpec(); - - if (!image_associate_alpha(img)) { - config.attribute("oiio:UnassociatedAlpha", 1); - } - - if (!(*in)->open(img->filename, spec, config)) { - return false; - } - } - else { - /* load image using builtin images callbacks */ - if (!builtin_image_info_cb || !builtin_image_pixels_cb) - return false; - } - /* we only handle certain number of components */ if (!(img->metadata.channels >= 1 && img->metadata.channels <= 4)) { - if (*in) { - (*in)->close(); - } - return false; - } - - return true; -} - -template<TypeDesc::BASETYPE FileFormat, typename StorageType, typename DeviceType> -bool ImageManager::file_load_image(Image *img, - ImageDataType type, - int texture_limit, - device_vector<DeviceType> &tex_img) -{ - unique_ptr<ImageInput> in = NULL; - if (!file_load_image_generic(img, &in)) { return false; } @@ -580,7 +478,7 @@ bool ImageManager::file_load_image(Image *img, } else { thread_scoped_lock device_lock(device_mutex); - pixels = (StorageType *)tex_img.alloc(width, height, depth); + pixels = (StorageType *)img->mem->alloc(width, height, depth); } if (pixels == NULL) { @@ -588,90 +486,21 @@ bool ImageManager::file_load_image(Image *img, return false; } - bool cmyk = false; const size_t num_pixels = ((size_t)width) * height * depth; - if (in) { - /* Read pixels through OpenImageIO. */ - StorageType *readpixels = pixels; - vector<StorageType> tmppixels; - if (components > 4) { - tmppixels.resize(((size_t)width) * height * components); - readpixels = &tmppixels[0]; - } - - if (depth <= 1) { - size_t scanlinesize = ((size_t)width) * components * sizeof(StorageType); - in->read_image(FileFormat, - (uchar *)readpixels + (height - 1) * scanlinesize, - AutoStride, - -scanlinesize, - AutoStride); - } - else { - in->read_image(FileFormat, (uchar *)readpixels); - } - - if (components > 4) { - size_t dimensions = ((size_t)width) * height; - for (size_t i = dimensions - 1, pixel = 0; pixel < dimensions; pixel++, i--) { - pixels[i * 4 + 3] = tmppixels[i * components + 3]; - pixels[i * 4 + 2] = tmppixels[i * components + 2]; - pixels[i * 4 + 1] = tmppixels[i * components + 1]; - pixels[i * 4 + 0] = tmppixels[i * components + 0]; - } - tmppixels.clear(); - } - - cmyk = strcmp(in->format_name(), "jpeg") == 0 && components == 4; - in->close(); - } - else { - /* Read pixels through callback. */ - if (FileFormat == TypeDesc::FLOAT) { - builtin_image_float_pixels_cb(img->filename, - img->builtin_data, - 0, /* TODO(lukas): Support tiles here? */ - (float *)&pixels[0], - num_pixels * components, - image_associate_alpha(img), - img->metadata.builtin_free_cache); - } - else if (FileFormat == TypeDesc::UINT8) { - builtin_image_pixels_cb(img->filename, - img->builtin_data, - 0, /* TODO(lukas): Support tiles here? */ - (uchar *)&pixels[0], - num_pixels * components, - image_associate_alpha(img), - img->metadata.builtin_free_cache); - } - else { - /* TODO(dingto): Support half for ImBuf. */ - } - } + img->loader->load_pixels( + img->metadata, pixels, num_pixels * components, image_associate_alpha(img)); /* The kernel can handle 1 and 4 channel images. Anything that is not a single * channel image is converted to RGBA format. */ - bool is_rgba = (type == IMAGE_DATA_TYPE_FLOAT4 || type == IMAGE_DATA_TYPE_HALF4 || - type == IMAGE_DATA_TYPE_BYTE4 || type == IMAGE_DATA_TYPE_USHORT4); + bool is_rgba = (img->metadata.type == IMAGE_DATA_TYPE_FLOAT4 || + img->metadata.type == IMAGE_DATA_TYPE_HALF4 || + img->metadata.type == IMAGE_DATA_TYPE_BYTE4 || + img->metadata.type == IMAGE_DATA_TYPE_USHORT4); if (is_rgba) { const StorageType one = util_image_cast_from_float<StorageType>(1.0f); - if (cmyk) { - /* CMYK to RGBA. */ - for (size_t i = num_pixels - 1, pixel = 0; pixel < num_pixels; pixel++, i--) { - float c = util_image_cast_to_float(pixels[i * 4 + 0]); - float m = util_image_cast_to_float(pixels[i * 4 + 1]); - float y = util_image_cast_to_float(pixels[i * 4 + 2]); - float k = util_image_cast_to_float(pixels[i * 4 + 3]); - pixels[i * 4 + 0] = util_image_cast_from_float<StorageType>((1.0f - c) * (1.0f - k)); - pixels[i * 4 + 1] = util_image_cast_from_float<StorageType>((1.0f - m) * (1.0f - k)); - pixels[i * 4 + 2] = util_image_cast_from_float<StorageType>((1.0f - y) * (1.0f - k)); - pixels[i * 4 + 3] = one; - } - } - else if (components == 2) { + if (components == 2) { /* Grayscale + alpha to RGBA. */ for (size_t i = num_pixels - 1, pixel = 0; pixel < num_pixels; pixel++, i--) { pixels[i * 4 + 3] = pixels[i * 2 + 1]; @@ -700,7 +529,7 @@ bool ImageManager::file_load_image(Image *img, } /* Disable alpha if requested by the user. */ - if (img->alpha_type == IMAGE_ALPHA_IGNORE) { + if (img->params.alpha_type == IMAGE_ALPHA_IGNORE) { for (size_t i = num_pixels - 1, pixel = 0; pixel < num_pixels; pixel++, i--) { pixels[i * 4 + 3] = one; } @@ -710,7 +539,7 @@ bool ImageManager::file_load_image(Image *img, img->metadata.colorspace != u_colorspace_srgb) { /* Convert to scene linear. */ ColorSpaceManager::to_scene_linear( - img->metadata.colorspace, pixels, width, height, depth, img->metadata.compress_as_srgb); + img->metadata.colorspace, pixels, num_pixels, img->metadata.compress_as_srgb); } } @@ -747,7 +576,8 @@ bool ImageManager::file_load_image(Image *img, while (max_size * scale_factor > texture_limit) { scale_factor *= 0.5f; } - VLOG(1) << "Scaling image " << img->filename << " by a factor of " << scale_factor << "."; + VLOG(1) << "Scaling image " << img->loader->name() << " by a factor of " << scale_factor + << "."; vector<StorageType> scaled_pixels; size_t scaled_width, scaled_height, scaled_depth; util_image_resize_pixels(pixels_storage, @@ -765,7 +595,7 @@ bool ImageManager::file_load_image(Image *img, { thread_scoped_lock device_lock(device_mutex); - texture_pixels = (StorageType *)tex_img.alloc(scaled_width, scaled_height, scaled_depth); + texture_pixels = (StorageType *)img->mem->alloc(scaled_width, scaled_height, scaled_depth); } memcpy(texture_pixels, &scaled_pixels[0], scaled_pixels.size() * sizeof(StorageType)); @@ -774,25 +604,23 @@ bool ImageManager::file_load_image(Image *img, return true; } -void ImageManager::device_load_image( - Device *device, Scene *scene, ImageDataType type, int slot, Progress *progress) +void ImageManager::device_load_image(Device *device, Scene *scene, int slot, Progress *progress) { - if (progress->get_cancel()) + if (progress->get_cancel()) { return; + } - Image *img = images[type][slot]; + Image *img = images[slot]; - if (osl_texture_system && !img->builtin_data) - return; - - string filename = path_filename(images[type][slot]->filename); - progress->set_status("Updating Images", "Loading " + filename); + progress->set_status("Updating Images", "Loading " + img->loader->name()); const int texture_limit = scene->params.texture_limit; - /* Slot assignment */ - int flat_slot = type_index_to_flattened_slot(slot, type); - img->mem_name = string_printf("__tex_image_%s_%03d", name_from_type(type), flat_slot); + load_image_metadata(img); + ImageDataType type = img->metadata.type; + + /* Name for debugging. */ + img->mem_name = string_printf("__tex_image_%s_%03d", name_from_type(type), slot); /* Free previous texture in slot. */ if (img->mem) { @@ -801,195 +629,131 @@ void ImageManager::device_load_image( img->mem = NULL; } + img->mem = new device_texture( + device, img->mem_name.c_str(), slot, type, img->params.interpolation, img->params.extension); + img->mem->info.use_transform_3d = img->metadata.use_transform_3d; + img->mem->info.transform_3d = img->metadata.transform_3d; + /* Create new texture. */ if (type == IMAGE_DATA_TYPE_FLOAT4) { - device_vector<float4> *tex_img = new device_vector<float4>( - device, img->mem_name.c_str(), MEM_TEXTURE); - - if (!file_load_image<TypeDesc::FLOAT, float>(img, type, texture_limit, *tex_img)) { + if (!file_load_image<TypeDesc::FLOAT, float>(img, texture_limit)) { /* on failure to load, we set a 1x1 pixels pink image */ thread_scoped_lock device_lock(device_mutex); - float *pixels = (float *)tex_img->alloc(1, 1); + float *pixels = (float *)img->mem->alloc(1, 1); pixels[0] = TEX_IMAGE_MISSING_R; pixels[1] = TEX_IMAGE_MISSING_G; pixels[2] = TEX_IMAGE_MISSING_B; pixels[3] = TEX_IMAGE_MISSING_A; } - - img->mem = tex_img; - img->mem->interpolation = img->interpolation; - img->mem->extension = img->extension; - - thread_scoped_lock device_lock(device_mutex); - tex_img->copy_to_device(); } else if (type == IMAGE_DATA_TYPE_FLOAT) { - device_vector<float> *tex_img = new device_vector<float>( - device, img->mem_name.c_str(), MEM_TEXTURE); - - if (!file_load_image<TypeDesc::FLOAT, float>(img, type, texture_limit, *tex_img)) { + if (!file_load_image<TypeDesc::FLOAT, float>(img, texture_limit)) { /* on failure to load, we set a 1x1 pixels pink image */ thread_scoped_lock device_lock(device_mutex); - float *pixels = (float *)tex_img->alloc(1, 1); + float *pixels = (float *)img->mem->alloc(1, 1); pixels[0] = TEX_IMAGE_MISSING_R; } - - img->mem = tex_img; - img->mem->interpolation = img->interpolation; - img->mem->extension = img->extension; - - thread_scoped_lock device_lock(device_mutex); - tex_img->copy_to_device(); } else if (type == IMAGE_DATA_TYPE_BYTE4) { - device_vector<uchar4> *tex_img = new device_vector<uchar4>( - device, img->mem_name.c_str(), MEM_TEXTURE); - - if (!file_load_image<TypeDesc::UINT8, uchar>(img, type, texture_limit, *tex_img)) { + if (!file_load_image<TypeDesc::UINT8, uchar>(img, texture_limit)) { /* on failure to load, we set a 1x1 pixels pink image */ thread_scoped_lock device_lock(device_mutex); - uchar *pixels = (uchar *)tex_img->alloc(1, 1); + uchar *pixels = (uchar *)img->mem->alloc(1, 1); pixels[0] = (TEX_IMAGE_MISSING_R * 255); pixels[1] = (TEX_IMAGE_MISSING_G * 255); pixels[2] = (TEX_IMAGE_MISSING_B * 255); pixels[3] = (TEX_IMAGE_MISSING_A * 255); } - - img->mem = tex_img; - img->mem->interpolation = img->interpolation; - img->mem->extension = img->extension; - - thread_scoped_lock device_lock(device_mutex); - tex_img->copy_to_device(); } else if (type == IMAGE_DATA_TYPE_BYTE) { - device_vector<uchar> *tex_img = new device_vector<uchar>( - device, img->mem_name.c_str(), MEM_TEXTURE); - - if (!file_load_image<TypeDesc::UINT8, uchar>(img, type, texture_limit, *tex_img)) { + if (!file_load_image<TypeDesc::UINT8, uchar>(img, texture_limit)) { /* on failure to load, we set a 1x1 pixels pink image */ thread_scoped_lock device_lock(device_mutex); - uchar *pixels = (uchar *)tex_img->alloc(1, 1); + uchar *pixels = (uchar *)img->mem->alloc(1, 1); pixels[0] = (TEX_IMAGE_MISSING_R * 255); } - - img->mem = tex_img; - img->mem->interpolation = img->interpolation; - img->mem->extension = img->extension; - - thread_scoped_lock device_lock(device_mutex); - tex_img->copy_to_device(); } else if (type == IMAGE_DATA_TYPE_HALF4) { - device_vector<half4> *tex_img = new device_vector<half4>( - device, img->mem_name.c_str(), MEM_TEXTURE); - - if (!file_load_image<TypeDesc::HALF, half>(img, type, texture_limit, *tex_img)) { + if (!file_load_image<TypeDesc::HALF, half>(img, texture_limit)) { /* on failure to load, we set a 1x1 pixels pink image */ thread_scoped_lock device_lock(device_mutex); - half *pixels = (half *)tex_img->alloc(1, 1); + half *pixels = (half *)img->mem->alloc(1, 1); pixels[0] = TEX_IMAGE_MISSING_R; pixels[1] = TEX_IMAGE_MISSING_G; pixels[2] = TEX_IMAGE_MISSING_B; pixels[3] = TEX_IMAGE_MISSING_A; } - - img->mem = tex_img; - img->mem->interpolation = img->interpolation; - img->mem->extension = img->extension; - - thread_scoped_lock device_lock(device_mutex); - tex_img->copy_to_device(); } else if (type == IMAGE_DATA_TYPE_USHORT) { - device_vector<uint16_t> *tex_img = new device_vector<uint16_t>( - device, img->mem_name.c_str(), MEM_TEXTURE); - - if (!file_load_image<TypeDesc::USHORT, uint16_t>(img, type, texture_limit, *tex_img)) { + if (!file_load_image<TypeDesc::USHORT, uint16_t>(img, texture_limit)) { /* on failure to load, we set a 1x1 pixels pink image */ thread_scoped_lock device_lock(device_mutex); - uint16_t *pixels = (uint16_t *)tex_img->alloc(1, 1); + uint16_t *pixels = (uint16_t *)img->mem->alloc(1, 1); pixels[0] = (TEX_IMAGE_MISSING_R * 65535); } - - img->mem = tex_img; - img->mem->interpolation = img->interpolation; - img->mem->extension = img->extension; - - thread_scoped_lock device_lock(device_mutex); - tex_img->copy_to_device(); } else if (type == IMAGE_DATA_TYPE_USHORT4) { - device_vector<ushort4> *tex_img = new device_vector<ushort4>( - device, img->mem_name.c_str(), MEM_TEXTURE); - - if (!file_load_image<TypeDesc::USHORT, uint16_t>(img, type, texture_limit, *tex_img)) { + if (!file_load_image<TypeDesc::USHORT, uint16_t>(img, texture_limit)) { /* on failure to load, we set a 1x1 pixels pink image */ thread_scoped_lock device_lock(device_mutex); - uint16_t *pixels = (uint16_t *)tex_img->alloc(1, 1); + uint16_t *pixels = (uint16_t *)img->mem->alloc(1, 1); pixels[0] = (TEX_IMAGE_MISSING_R * 65535); pixels[1] = (TEX_IMAGE_MISSING_G * 65535); pixels[2] = (TEX_IMAGE_MISSING_B * 65535); pixels[3] = (TEX_IMAGE_MISSING_A * 65535); } - - img->mem = tex_img; - img->mem->interpolation = img->interpolation; - img->mem->extension = img->extension; - - thread_scoped_lock device_lock(device_mutex); - tex_img->copy_to_device(); } else if (type == IMAGE_DATA_TYPE_HALF) { - device_vector<half> *tex_img = new device_vector<half>( - device, img->mem_name.c_str(), MEM_TEXTURE); - - if (!file_load_image<TypeDesc::HALF, half>(img, type, texture_limit, *tex_img)) { + if (!file_load_image<TypeDesc::HALF, half>(img, texture_limit)) { /* on failure to load, we set a 1x1 pixels pink image */ thread_scoped_lock device_lock(device_mutex); - half *pixels = (half *)tex_img->alloc(1, 1); + half *pixels = (half *)img->mem->alloc(1, 1); pixels[0] = TEX_IMAGE_MISSING_R; } + } - img->mem = tex_img; - img->mem->interpolation = img->interpolation; - img->mem->extension = img->extension; - + { thread_scoped_lock device_lock(device_mutex); - tex_img->copy_to_device(); + img->mem->copy_to_device(); } + + /* Cleanup memory in image loader. */ + img->loader->cleanup(); img->need_load = false; } -void ImageManager::device_free_image(Device *, ImageDataType type, int slot) +void ImageManager::device_free_image(Device *, int slot) { - Image *img = images[type][slot]; + Image *img = images[slot]; + if (img == NULL) { + return; + } - if (img) { - if (osl_texture_system && !img->builtin_data) { + if (osl_texture_system) { #ifdef WITH_OSL - ustring filename(images[type][slot]->filename); - ((OSL::TextureSystem *)osl_texture_system)->invalidate(filename); -#endif - } - - if (img->mem) { - thread_scoped_lock device_lock(device_mutex); - delete img->mem; + ustring filepath = img->loader->osl_filepath(); + if (!filepath.empty()) { + ((OSL::TextureSystem *)osl_texture_system)->invalidate(filepath); } +#endif + } - delete img; - images[type][slot] = NULL; - --tex_num_images[type]; + if (img->mem) { + thread_scoped_lock device_lock(device_mutex); + delete img->mem; } + + delete img->loader; + delete img; + images[slot] = NULL; } void ImageManager::device_update(Device *device, Scene *scene, Progress &progress) @@ -999,24 +763,14 @@ void ImageManager::device_update(Device *device, Scene *scene, Progress &progres } TaskPool pool; - for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { - for (size_t slot = 0; slot < images[type].size(); slot++) { - if (!images[type][slot]) - continue; - - if (images[type][slot]->users == 0) { - device_free_image(device, (ImageDataType)type, slot); - } - else if (images[type][slot]->need_load) { - if (!osl_texture_system || images[type][slot]->builtin_data) - pool.push(function_bind(&ImageManager::device_load_image, - this, - device, - scene, - (ImageDataType)type, - slot, - &progress)); - } + for (size_t slot = 0; slot < images.size(); slot++) { + Image *img = images[slot]; + if (img && img->users == 0) { + device_free_image(device, slot); + } + else if (img && img->need_load) { + pool.push( + function_bind(&ImageManager::device_load_image, this, device, scene, slot, &progress)); } } @@ -1025,23 +779,16 @@ void ImageManager::device_update(Device *device, Scene *scene, Progress &progres need_update = false; } -void ImageManager::device_update_slot(Device *device, - Scene *scene, - int flat_slot, - Progress *progress) +void ImageManager::device_update_slot(Device *device, Scene *scene, int slot, Progress *progress) { - ImageDataType type; - int slot = flattened_slot_to_type_index(flat_slot, &type); - - Image *image = images[type][slot]; - assert(image != NULL); + Image *img = images[slot]; + assert(img != NULL); - if (image->users == 0) { - device_free_image(device, type, slot); + if (img->users == 0) { + device_free_image(device, slot); } - else if (image->need_load) { - if (!osl_texture_system || image->builtin_data) - device_load_image(device, scene, type, slot, progress); + else if (img->need_load) { + device_load_image(device, scene, slot, progress); } } @@ -1054,22 +801,11 @@ void ImageManager::device_load_builtin(Device *device, Scene *scene, Progress &p } TaskPool pool; - for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { - for (size_t slot = 0; slot < images[type].size(); slot++) { - if (!images[type][slot]) - continue; - - if (images[type][slot]->need_load) { - if (images[type][slot]->builtin_data) { - pool.push(function_bind(&ImageManager::device_load_image, - this, - device, - scene, - (ImageDataType)type, - slot, - &progress)); - } - } + for (size_t slot = 0; slot < images.size(); slot++) { + Image *img = images[slot]; + if (img && img->need_load && img->builtin) { + pool.push( + function_bind(&ImageManager::device_load_image, this, device, scene, slot, &progress)); } } @@ -1078,31 +814,27 @@ void ImageManager::device_load_builtin(Device *device, Scene *scene, Progress &p void ImageManager::device_free_builtin(Device *device) { - for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { - for (size_t slot = 0; slot < images[type].size(); slot++) { - if (images[type][slot] && images[type][slot]->builtin_data) - device_free_image(device, (ImageDataType)type, slot); + for (size_t slot = 0; slot < images.size(); slot++) { + Image *img = images[slot]; + if (img && img->builtin) { + device_free_image(device, slot); } } } void ImageManager::device_free(Device *device) { - for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { - for (size_t slot = 0; slot < images[type].size(); slot++) { - device_free_image(device, (ImageDataType)type, slot); - } - images[type].clear(); + for (size_t slot = 0; slot < images.size(); slot++) { + device_free_image(device, slot); } + images.clear(); } void ImageManager::collect_statistics(RenderStats *stats) { - for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { - foreach (const Image *image, images[type]) { - stats->image.textures.add_entry( - NamedSizeEntry(path_filename(image->filename), image->mem->memory_size())); - } + foreach (const Image *image, images) { + stats->image.textures.add_entry( + NamedSizeEntry(image->loader->name(), image->mem->memory_size())); } } diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index bc04a667953..00ab12afd7a 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -22,92 +22,157 @@ #include "render/colorspace.h" -#include "util/util_image.h" #include "util/util_string.h" #include "util/util_thread.h" +#include "util/util_transform.h" #include "util/util_unique_ptr.h" #include "util/util_vector.h" CCL_NAMESPACE_BEGIN class Device; +class ImageHandle; +class ImageKey; +class ImageMetaData; +class ImageManager; class Progress; class RenderStats; class Scene; class ColorSpaceProcessor; -class ImageMetaData { +/* Image Parameters */ +class ImageParams { public: - /* Must be set by image file or builtin callback. */ - bool is_float, is_half; - int channels; - size_t width, height, depth; - bool builtin_free_cache; - - /* Automatically set. */ - ImageDataType type; + bool animated; + InterpolationType interpolation; + ExtensionType extension; + ImageAlphaType alpha_type; ustring colorspace; - bool compress_as_srgb; + float frame; - ImageMetaData() - : is_float(false), - is_half(false), - channels(0), - width(0), - height(0), - depth(0), - builtin_free_cache(false), - type((ImageDataType)0), + ImageParams() + : animated(false), + interpolation(INTERPOLATION_LINEAR), + extension(EXTENSION_CLIP), + alpha_type(IMAGE_ALPHA_AUTO), colorspace(u_colorspace_raw), - compress_as_srgb(false) + frame(0.0f) { } - bool operator==(const ImageMetaData &other) const + bool operator==(const ImageParams &other) const { - return is_float == other.is_float && is_half == other.is_half && channels == other.channels && - width == other.width && height == other.height && depth == other.depth && - type == other.type && colorspace == other.colorspace && - compress_as_srgb == other.compress_as_srgb; + return (animated == other.animated && interpolation == other.interpolation && + extension == other.extension && alpha_type == other.alpha_type && + colorspace == other.colorspace && frame == other.frame); } }; +/* Image MetaData + * + * Information about the image that is available before the image pixels are loaded. */ +class ImageMetaData { + public: + /* Set by ImageLoader.load_metadata(). */ + int channels; + size_t width, height, depth; + ImageDataType type; + + /* Optional color space, defaults to raw. */ + ustring colorspace; + const char *colorspace_file_format; + + /* Optional transform for 3D images. */ + bool use_transform_3d; + Transform transform_3d; + + /* Automatically set. */ + bool compress_as_srgb; + + ImageMetaData(); + bool operator==(const ImageMetaData &other) const; + bool is_float() const; + void detect_colorspace(); +}; + +/* Image loader base class, that can be subclassed to load image data + * from custom sources (file, memory, procedurally generated, etc). */ +class ImageLoader { + public: + ImageLoader(); + virtual ~ImageLoader(){}; + + /* Load metadata without actual image yet, should be fast. */ + virtual bool load_metadata(ImageMetaData &metadata) = 0; + + /* Load actual image contents. */ + virtual bool load_pixels(const ImageMetaData &metadata, + void *pixels, + const size_t pixels_size, + const bool associate_alpha) = 0; + + /* Name for logs and stats. */ + virtual string name() const = 0; + + /* Optional for OSL texture cache. */ + virtual ustring osl_filepath() const; + + /* Free any memory used for loading metadata and pixels. */ + virtual void cleanup(){}; + + /* Compare avoid loading the same image multiple times. */ + virtual bool equals(const ImageLoader &other) const = 0; + static bool equals(const ImageLoader *a, const ImageLoader *b); + + /* Work around for no RTTI. */ +}; + +/* Image Handle + * + * Access handle for image in the image manager. Multiple shader nodes may + * share the same image, and this class handles reference counting for that. */ +class ImageHandle { + public: + ImageHandle(); + ImageHandle(const ImageHandle &other); + ImageHandle &operator=(const ImageHandle &other); + ~ImageHandle(); + + bool operator==(const ImageHandle &other) const; + + void clear(); + + bool empty(); + int num_tiles(); + + ImageMetaData metadata(); + int svm_slot(const int tile_index = 0) const; + device_texture *image_memory(const int tile_index = 0) const; + + protected: + vector<int> tile_slots; + ImageManager *manager; + + friend class ImageManager; +}; + +/* Image Manager + * + * Handles loading and storage of all images in the scene. This includes 2D + * texture images and 3D volume images. */ class ImageManager { public: explicit ImageManager(const DeviceInfo &info); ~ImageManager(); - int add_image(const string &filename, - void *builtin_data, - bool animated, - float frame, - InterpolationType interpolation, - ExtensionType extension, - ImageAlphaType alpha_type, - ustring colorspace, - ImageMetaData &metadata); - void add_image_user(int flat_slot); - void remove_image(int flat_slot); - void remove_image(const string &filename, - void *builtin_data, - InterpolationType interpolation, - ExtensionType extension, - ImageAlphaType alpha_type, - ustring colorspace); - void tag_reload_image(const string &filename, - void *builtin_data, - InterpolationType interpolation, - ExtensionType extension, - ImageAlphaType alpha_type, - ustring colorspace); - bool get_image_metadata(const string &filename, - void *builtin_data, - ustring colorspace, - ImageMetaData &metadata); - bool get_image_metadata(int flat_slot, ImageMetaData &metadata); + ImageHandle add_image(const string &filename, const ImageParams ¶ms); + ImageHandle add_image(const string &filename, + const ImageParams ¶ms, + const vector<int> &tiles); + ImageHandle add_image(ImageLoader *loader, const ImageParams ¶ms); void device_update(Device *device, Scene *scene, Progress &progress); - void device_update_slot(Device *device, Scene *scene, int flat_slot, Progress *progress); + void device_update_slot(Device *device, Scene *scene, int slot, Progress *progress); void device_free(Device *device); void device_load_builtin(Device *device, Scene *scene, Progress &progress); @@ -116,78 +181,49 @@ class ImageManager { void set_osl_texture_system(void *texture_system); bool set_animation_frame_update(int frame); - device_memory *image_memory(int flat_slot); - void collect_statistics(RenderStats *stats); bool need_update; - /* NOTE: Here pixels_size is a size of storage, which equals to - * width * height * depth. - * Use this to avoid some nasty memory corruptions. - */ - function<void(const string &filename, void *data, ImageMetaData &metadata)> - builtin_image_info_cb; - function<bool(const string &filename, - void *data, - int tile, - unsigned char *pixels, - const size_t pixels_size, - const bool associate_alpha, - const bool free_cache)> - builtin_image_pixels_cb; - function<bool(const string &filename, - void *data, - int tile, - float *pixels, - const size_t pixels_size, - const bool associate_alpha, - const bool free_cache)> - builtin_image_float_pixels_cb; - struct Image { - string filename; - void *builtin_data; + ImageParams params; ImageMetaData metadata; + ImageLoader *loader; - ustring colorspace; - ImageAlphaType alpha_type; - bool need_load; - bool animated; float frame; - InterpolationType interpolation; - ExtensionType extension; + bool need_metadata; + bool need_load; + bool builtin; string mem_name; - device_memory *mem; + device_texture *mem; int users; + thread_mutex mutex; }; private: - int tex_num_images[IMAGE_DATA_NUM_TYPES]; - int max_num_images; bool has_half_images; thread_mutex device_mutex; int animation_frame; - vector<Image *> images[IMAGE_DATA_NUM_TYPES]; + vector<Image *> images; void *osl_texture_system; - bool file_load_image_generic(Image *img, unique_ptr<ImageInput> *in); + int add_image_slot(ImageLoader *loader, const ImageParams ¶ms, const bool builtin); + void add_image_user(int slot); + void remove_image_user(int slot); + + void load_image_metadata(Image *img); - template<TypeDesc::BASETYPE FileFormat, typename StorageType, typename DeviceType> - bool file_load_image(Image *img, - ImageDataType type, - int texture_limit, - device_vector<DeviceType> &tex_img); + template<TypeDesc::BASETYPE FileFormat, typename StorageType> + bool file_load_image(Image *img, int texture_limit); - void metadata_detect_colorspace(ImageMetaData &metadata, const char *file_format); + void device_load_image(Device *device, Scene *scene, int slot, Progress *progress); + void device_free_image(Device *device, int slot); - void device_load_image( - Device *device, Scene *scene, ImageDataType type, int slot, Progress *progress); - void device_free_image(Device *device, ImageDataType type, int slot); + friend class ImageHandle; }; CCL_NAMESPACE_END diff --git a/intern/cycles/render/image_oiio.cpp b/intern/cycles/render/image_oiio.cpp new file mode 100644 index 00000000000..c4f95c6b4bc --- /dev/null +++ b/intern/cycles/render/image_oiio.cpp @@ -0,0 +1,236 @@ +/* + * Copyright 2011-2020 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "render/image_oiio.h" + +#include "util/util_image.h" +#include "util/util_logging.h" +#include "util/util_path.h" + +CCL_NAMESPACE_BEGIN + +OIIOImageLoader::OIIOImageLoader(const string &filepath) : filepath(filepath) +{ +} + +OIIOImageLoader::~OIIOImageLoader() +{ +} + +bool OIIOImageLoader::load_metadata(ImageMetaData &metadata) +{ + /* Perform preliminary checks, with meaningful logging. */ + if (!path_exists(filepath.string())) { + VLOG(1) << "File '" << filepath.string() << "' does not exist."; + return false; + } + if (path_is_directory(filepath.string())) { + VLOG(1) << "File '" << filepath.string() << "' is a directory, can't use as image."; + return false; + } + + unique_ptr<ImageInput> in(ImageInput::create(filepath.string())); + + if (!in) { + return false; + } + + ImageSpec spec; + if (!in->open(filepath.string(), spec)) { + return false; + } + + metadata.width = spec.width; + metadata.height = spec.height; + metadata.depth = spec.depth; + metadata.compress_as_srgb = false; + + /* Check the main format, and channel formats. */ + size_t channel_size = spec.format.basesize(); + + bool is_float = false; + bool is_half = false; + + if (spec.format.is_floating_point()) { + is_float = true; + } + + for (size_t channel = 0; channel < spec.channelformats.size(); channel++) { + channel_size = max(channel_size, spec.channelformats[channel].basesize()); + if (spec.channelformats[channel].is_floating_point()) { + is_float = true; + } + } + + /* check if it's half float */ + if (spec.format == TypeDesc::HALF) { + is_half = true; + } + + /* set type and channels */ + metadata.channels = spec.nchannels; + + if (is_half) { + metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_HALF4 : IMAGE_DATA_TYPE_HALF; + } + else if (is_float) { + metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_FLOAT; + } + else if (spec.format == TypeDesc::USHORT) { + metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_USHORT4 : IMAGE_DATA_TYPE_USHORT; + } + else { + metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_BYTE4 : IMAGE_DATA_TYPE_BYTE; + } + + metadata.colorspace_file_format = in->format_name(); + + in->close(); + + return true; +} + +template<TypeDesc::BASETYPE FileFormat, typename StorageType> +static void oiio_load_pixels(const ImageMetaData &metadata, + const unique_ptr<ImageInput> &in, + StorageType *pixels) +{ + const int width = metadata.width; + const int height = metadata.height; + const int depth = metadata.depth; + const int components = metadata.channels; + + /* Read pixels through OpenImageIO. */ + StorageType *readpixels = pixels; + vector<StorageType> tmppixels; + if (components > 4) { + tmppixels.resize(((size_t)width) * height * components); + readpixels = &tmppixels[0]; + } + + if (depth <= 1) { + size_t scanlinesize = ((size_t)width) * components * sizeof(StorageType); + in->read_image(FileFormat, + (uchar *)readpixels + (height - 1) * scanlinesize, + AutoStride, + -scanlinesize, + AutoStride); + } + else { + in->read_image(FileFormat, (uchar *)readpixels); + } + + if (components > 4) { + size_t dimensions = ((size_t)width) * height; + for (size_t i = dimensions - 1, pixel = 0; pixel < dimensions; pixel++, i--) { + pixels[i * 4 + 3] = tmppixels[i * components + 3]; + pixels[i * 4 + 2] = tmppixels[i * components + 2]; + pixels[i * 4 + 1] = tmppixels[i * components + 1]; + pixels[i * 4 + 0] = tmppixels[i * components + 0]; + } + tmppixels.clear(); + } + + /* CMYK to RGBA. */ + const bool cmyk = strcmp(in->format_name(), "jpeg") == 0 && components == 4; + if (cmyk) { + const StorageType one = util_image_cast_from_float<StorageType>(1.0f); + + const size_t num_pixels = ((size_t)width) * height * depth; + for (size_t i = num_pixels - 1, pixel = 0; pixel < num_pixels; pixel++, i--) { + float c = util_image_cast_to_float(pixels[i * 4 + 0]); + float m = util_image_cast_to_float(pixels[i * 4 + 1]); + float y = util_image_cast_to_float(pixels[i * 4 + 2]); + float k = util_image_cast_to_float(pixels[i * 4 + 3]); + pixels[i * 4 + 0] = util_image_cast_from_float<StorageType>((1.0f - c) * (1.0f - k)); + pixels[i * 4 + 1] = util_image_cast_from_float<StorageType>((1.0f - m) * (1.0f - k)); + pixels[i * 4 + 2] = util_image_cast_from_float<StorageType>((1.0f - y) * (1.0f - k)); + pixels[i * 4 + 3] = one; + } + } +} + +bool OIIOImageLoader::load_pixels(const ImageMetaData &metadata, + void *pixels, + const size_t, + const bool associate_alpha) +{ + unique_ptr<ImageInput> in = NULL; + + /* NOTE: Error logging is done in meta data acquisition. */ + if (!path_exists(filepath.string()) || path_is_directory(filepath.string())) { + return false; + } + + /* load image from file through OIIO */ + in = unique_ptr<ImageInput>(ImageInput::create(filepath.string())); + if (!in) { + return false; + } + + ImageSpec spec = ImageSpec(); + ImageSpec config = ImageSpec(); + + if (!associate_alpha) { + config.attribute("oiio:UnassociatedAlpha", 1); + } + + if (!in->open(filepath.string(), spec, config)) { + return false; + } + + switch (metadata.type) { + case IMAGE_DATA_TYPE_BYTE: + case IMAGE_DATA_TYPE_BYTE4: + oiio_load_pixels<TypeDesc::UINT8, uchar>(metadata, in, (uchar *)pixels); + break; + case IMAGE_DATA_TYPE_USHORT: + case IMAGE_DATA_TYPE_USHORT4: + oiio_load_pixels<TypeDesc::USHORT, uint16_t>(metadata, in, (uint16_t *)pixels); + break; + case IMAGE_DATA_TYPE_HALF: + case IMAGE_DATA_TYPE_HALF4: + oiio_load_pixels<TypeDesc::HALF, half>(metadata, in, (half *)pixels); + break; + case IMAGE_DATA_TYPE_FLOAT: + case IMAGE_DATA_TYPE_FLOAT4: + oiio_load_pixels<TypeDesc::FLOAT, float>(metadata, in, (float *)pixels); + break; + case IMAGE_DATA_NUM_TYPES: + break; + } + + in->close(); + return true; +} + +string OIIOImageLoader::name() const +{ + return path_filename(filepath.string()); +} + +ustring OIIOImageLoader::osl_filepath() const +{ + return filepath; +} + +bool OIIOImageLoader::equals(const ImageLoader &other) const +{ + const OIIOImageLoader &other_loader = (const OIIOImageLoader &)other; + return filepath == other_loader.filepath; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/render/image_oiio.h b/intern/cycles/render/image_oiio.h new file mode 100644 index 00000000000..a234b968557 --- /dev/null +++ b/intern/cycles/render/image_oiio.h @@ -0,0 +1,48 @@ +/* + * Copyright 2011-2020 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __IMAGE_OIIO__ +#define __IMAGE_OIIO__ + +#include "render/image.h" + +CCL_NAMESPACE_BEGIN + +class OIIOImageLoader : public ImageLoader { + public: + OIIOImageLoader(const string &filepath); + ~OIIOImageLoader(); + + bool load_metadata(ImageMetaData &metadata) override; + + bool load_pixels(const ImageMetaData &metadata, + void *pixels, + const size_t pixels_size, + const bool associate_alpha) override; + + string name() const override; + + ustring osl_filepath() const override; + + bool equals(const ImageLoader &other) const override; + + protected: + ustring filepath; +}; + +CCL_NAMESPACE_END + +#endif /* __IMAGE_OIIO__ */ diff --git a/intern/cycles/render/image_vdb.cpp b/intern/cycles/render/image_vdb.cpp new file mode 100644 index 00000000000..500131c2d84 --- /dev/null +++ b/intern/cycles/render/image_vdb.cpp @@ -0,0 +1,188 @@ +/* + * Copyright 2011-2020 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "render/image_vdb.h" + +#ifdef WITH_OPENVDB +# include <openvdb/openvdb.h> +# include <openvdb/tools/Dense.h> +#endif + +CCL_NAMESPACE_BEGIN + +VDBImageLoader::VDBImageLoader(const string &grid_name) : grid_name(grid_name) +{ +} + +VDBImageLoader::~VDBImageLoader() +{ +} + +bool VDBImageLoader::load_metadata(ImageMetaData &metadata) +{ +#ifdef WITH_OPENVDB + if (!grid) { + return false; + } + + bbox = grid->evalActiveVoxelBoundingBox(); + if (bbox.empty()) { + return false; + } + + /* Set dimensions. */ + openvdb::Coord dim = bbox.dim(); + openvdb::Coord min = bbox.min(); + metadata.width = dim.x(); + metadata.height = dim.y(); + metadata.depth = dim.z(); + + /* Set data type. */ + if (grid->isType<openvdb::FloatGrid>()) { + metadata.channels = 1; + } + else if (grid->isType<openvdb::Vec3fGrid>()) { + metadata.channels = 3; + } + else if (grid->isType<openvdb::BoolGrid>()) { + metadata.channels = 1; + } + else if (grid->isType<openvdb::DoubleGrid>()) { + metadata.channels = 1; + } + else if (grid->isType<openvdb::Int32Grid>()) { + metadata.channels = 1; + } + else if (grid->isType<openvdb::Int64Grid>()) { + metadata.channels = 1; + } + else if (grid->isType<openvdb::Vec3IGrid>()) { + metadata.channels = 3; + } + else if (grid->isType<openvdb::Vec3dGrid>()) { + metadata.channels = 3; + } + else if (grid->isType<openvdb::MaskGrid>()) { + metadata.channels = 1; + } + else { + return false; + } + + if (metadata.channels == 1) { + metadata.type = IMAGE_DATA_TYPE_FLOAT; + } + else { + metadata.type = IMAGE_DATA_TYPE_FLOAT4; + } + + /* Set transform from object space to voxel index. */ + openvdb::math::Mat4f grid_matrix = grid->transform().baseMap()->getAffineMap()->getMat4(); + Transform index_to_object; + for (int col = 0; col < 4; col++) { + for (int row = 0; row < 3; row++) { + index_to_object[row][col] = (float)grid_matrix[col][row]; + } + } + + Transform texture_to_index = transform_translate(min.x(), min.y(), min.z()) * + transform_scale(dim.x(), dim.y(), dim.z()); + + metadata.transform_3d = transform_inverse(index_to_object * texture_to_index); + metadata.use_transform_3d = true; + + return true; +#else + (void)metadata; + return false; +#endif +} + +bool VDBImageLoader::load_pixels(const ImageMetaData &, void *pixels, const size_t, const bool) +{ +#ifdef WITH_OPENVDB + if (grid->isType<openvdb::FloatGrid>()) { + openvdb::tools::Dense<float, openvdb::tools::LayoutXYZ> dense(bbox, (float *)pixels); + openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::FloatGrid>(grid), dense); + } + else if (grid->isType<openvdb::Vec3fGrid>()) { + openvdb::tools::Dense<openvdb::Vec3f, openvdb::tools::LayoutXYZ> dense( + bbox, (openvdb::Vec3f *)pixels); + openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::Vec3fGrid>(grid), dense); + } + else if (grid->isType<openvdb::BoolGrid>()) { + openvdb::tools::Dense<float, openvdb::tools::LayoutXYZ> dense(bbox, (float *)pixels); + openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::BoolGrid>(grid), dense); + } + else if (grid->isType<openvdb::DoubleGrid>()) { + openvdb::tools::Dense<float, openvdb::tools::LayoutXYZ> dense(bbox, (float *)pixels); + openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::DoubleGrid>(grid), dense); + } + else if (grid->isType<openvdb::Int32Grid>()) { + openvdb::tools::Dense<float, openvdb::tools::LayoutXYZ> dense(bbox, (float *)pixels); + openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::Int32Grid>(grid), dense); + } + else if (grid->isType<openvdb::Int64Grid>()) { + openvdb::tools::Dense<float, openvdb::tools::LayoutXYZ> dense(bbox, (float *)pixels); + openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::Int64Grid>(grid), dense); + } + else if (grid->isType<openvdb::Vec3IGrid>()) { + openvdb::tools::Dense<openvdb::Vec3f, openvdb::tools::LayoutXYZ> dense( + bbox, (openvdb::Vec3f *)pixels); + openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::Vec3IGrid>(grid), dense); + } + else if (grid->isType<openvdb::Vec3dGrid>()) { + openvdb::tools::Dense<openvdb::Vec3f, openvdb::tools::LayoutXYZ> dense( + bbox, (openvdb::Vec3f *)pixels); + openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::Vec3dGrid>(grid), dense); + } + else if (grid->isType<openvdb::MaskGrid>()) { + openvdb::tools::Dense<float, openvdb::tools::LayoutXYZ> dense(bbox, (float *)pixels); + openvdb::tools::copyToDense(*openvdb::gridConstPtrCast<openvdb::MaskGrid>(grid), dense); + } + + return true; +#else + (void)pixels; + return false; +#endif +} + +string VDBImageLoader::name() const +{ + return grid_name; +} + +bool VDBImageLoader::equals(const ImageLoader &other) const +{ +#ifdef WITH_OPENVDB + const VDBImageLoader &other_loader = (const VDBImageLoader &)other; + return grid == other_loader.grid; +#else + (void)other; + return true; +#endif +} + +void VDBImageLoader::cleanup() +{ +#ifdef WITH_OPENVDB + /* Free OpenVDB grid memory as soon as we can. */ + grid.reset(); +#endif +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/render/image_vdb.h b/intern/cycles/render/image_vdb.h new file mode 100644 index 00000000000..7dec63b11e6 --- /dev/null +++ b/intern/cycles/render/image_vdb.h @@ -0,0 +1,56 @@ +/* + * Copyright 2011-2020 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __IMAGE_VDB__ +#define __IMAGE_VDB__ + +#ifdef WITH_OPENVDB +# include <openvdb/openvdb.h> +#endif + +#include "render/image.h" + +CCL_NAMESPACE_BEGIN + +class VDBImageLoader : public ImageLoader { + public: + VDBImageLoader(const string &grid_name); + ~VDBImageLoader(); + + virtual bool load_metadata(ImageMetaData &metadata) override; + + virtual bool load_pixels(const ImageMetaData &metadata, + void *pixels, + const size_t pixels_size, + const bool associate_alpha) override; + + virtual string name() const override; + + virtual bool equals(const ImageLoader &other) const override; + + virtual void cleanup() override; + + protected: + string grid_name; +#ifdef WITH_OPENVDB + openvdb::GridBase::ConstPtr grid; + openvdb::CoordBBox bbox; +#endif +}; + +CCL_NAMESPACE_END + +#endif /* __IMAGE_VDB__ */ diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp index 530c32106b7..2f9d088899e 100644 --- a/intern/cycles/render/integrator.cpp +++ b/intern/cycles/render/integrator.cpp @@ -14,17 +14,21 @@ * limitations under the License. */ +#include "render/integrator.h" #include "device/device.h" #include "render/background.h" -#include "render/integrator.h" #include "render/film.h" +#include "render/jitter.h" #include "render/light.h" #include "render/scene.h" #include "render/shader.h" #include "render/sobol.h" +#include "kernel/kernel_types.h" + #include "util/util_foreach.h" #include "util/util_hash.h" +#include "util/util_logging.h" CCL_NAMESPACE_BEGIN @@ -46,7 +50,7 @@ NODE_DEFINE(Integrator) SOCKET_INT(ao_bounces, "AO Bounces", 0); SOCKET_INT(volume_max_steps, "Volume Max Steps", 1024); - SOCKET_FLOAT(volume_step_size, "Volume Step Size", 0.1f); + SOCKET_FLOAT(volume_step_rate, "Volume Step Rate", 1.0f); SOCKET_BOOLEAN(caustics_reflective, "Reflective Caustics", true); SOCKET_BOOLEAN(caustics_refractive, "Refractive Caustics", true); @@ -66,6 +70,9 @@ NODE_DEFINE(Integrator) SOCKET_INT(volume_samples, "Volume Samples", 1); SOCKET_INT(start_sample, "Start Sample", 0); + SOCKET_FLOAT(adaptive_threshold, "Adaptive Threshold", 0.0f); + SOCKET_INT(adaptive_min_samples, "Adaptive Min Samples", 0); + SOCKET_BOOLEAN(sample_all_lights_direct, "Sample All Lights Direct", true); SOCKET_BOOLEAN(sample_all_lights_indirect, "Sample All Lights Indirect", true); SOCKET_FLOAT(light_sampling_threshold, "Light Sampling Threshold", 0.05f); @@ -78,6 +85,7 @@ NODE_DEFINE(Integrator) static NodeEnum sampling_pattern_enum; sampling_pattern_enum.insert("sobol", SAMPLING_PATTERN_SOBOL); sampling_pattern_enum.insert("cmj", SAMPLING_PATTERN_CMJ); + sampling_pattern_enum.insert("pmj", SAMPLING_PATTERN_PMJ); SOCKET_ENUM(sampling_pattern, "Sampling Pattern", sampling_pattern_enum, SAMPLING_PATTERN_SOBOL); return type; @@ -135,7 +143,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene } kintegrator->volume_max_steps = volume_max_steps; - kintegrator->volume_step_size = volume_step_size; + kintegrator->volume_step_rate = volume_step_rate; kintegrator->caustics_reflective = caustics_reflective; kintegrator->caustics_refractive = caustics_refractive; @@ -174,6 +182,22 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene kintegrator->sampling_pattern = sampling_pattern; kintegrator->aa_samples = aa_samples; + if (aa_samples > 0 && adaptive_min_samples == 0) { + kintegrator->adaptive_min_samples = max(4, (int)sqrtf(aa_samples)); + VLOG(1) << "Cycles adaptive sampling: automatic min samples = " + << kintegrator->adaptive_min_samples; + } + else { + kintegrator->adaptive_min_samples = max(4, adaptive_min_samples); + } + if (aa_samples > 0 && adaptive_threshold == 0.0f) { + kintegrator->adaptive_threshold = max(0.001f, 1.0f / (float)aa_samples); + VLOG(1) << "Cycles adaptive sampling: automatic threshold = " + << kintegrator->adaptive_threshold; + } + else { + kintegrator->adaptive_threshold = adaptive_threshold; + } if (light_sampling_threshold > 0.0f) { kintegrator->light_inv_rr_threshold = 1.0f / light_sampling_threshold; @@ -203,18 +227,34 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene int dimensions = PRNG_BASE_NUM + max_samples * PRNG_BOUNCE_NUM; dimensions = min(dimensions, SOBOL_MAX_DIMENSIONS); - uint *directions = dscene->sobol_directions.alloc(SOBOL_BITS * dimensions); + if (sampling_pattern == SAMPLING_PATTERN_SOBOL) { + uint *directions = dscene->sample_pattern_lut.alloc(SOBOL_BITS * dimensions); - sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions); + sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions); - dscene->sobol_directions.copy_to_device(); + dscene->sample_pattern_lut.copy_to_device(); + } + else { + constexpr int sequence_size = NUM_PMJ_SAMPLES; + constexpr int num_sequences = NUM_PMJ_PATTERNS; + float2 *directions = (float2 *)dscene->sample_pattern_lut.alloc(sequence_size * num_sequences * + 2); + TaskPool pool; + for (int j = 0; j < num_sequences; ++j) { + float2 *sequence = directions + j * sequence_size; + pool.push( + function_bind(&progressive_multi_jitter_02_generate_2D, sequence, sequence_size, j)); + } + pool.wait_work(); + dscene->sample_pattern_lut.copy_to_device(); + } need_update = false; } void Integrator::device_free(Device *, DeviceScene *dscene) { - dscene->sobol_directions.free(); + dscene->sample_pattern_lut.free(); } bool Integrator::modified(const Integrator &integrator) diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h index 32d84c27072..9804caebe6e 100644 --- a/intern/cycles/render/integrator.h +++ b/intern/cycles/render/integrator.h @@ -45,7 +45,7 @@ class Integrator : public Node { int ao_bounces; int volume_max_steps; - float volume_step_size; + float volume_step_rate; bool caustics_reflective; bool caustics_refractive; @@ -75,6 +75,9 @@ class Integrator : public Node { bool sample_all_lights_indirect; float light_sampling_threshold; + int adaptive_min_samples; + float adaptive_threshold; + enum Method { BRANCHED_PATH = 0, PATH = 1, diff --git a/intern/cycles/render/jitter.cpp b/intern/cycles/render/jitter.cpp new file mode 100644 index 00000000000..fc47b0e8f0a --- /dev/null +++ b/intern/cycles/render/jitter.cpp @@ -0,0 +1,287 @@ +/* + * Copyright 2019 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This file is based on "Progressive Multi-Jittered Sample Sequences" + * by Per Christensen, Andrew Kensler and Charlie Kilpatrick. + * http://graphics.pixar.com/library/ProgressiveMultiJitteredSampling/paper.pdf + * + * Performance can be improved in the future by implementing the new + * algorithm from Matt Pharr in http://jcgt.org/published/0008/01/04/ + * "Efficient Generation of Points that Satisfy Two-Dimensional Elementary Intervals" + */ + +#include "render/jitter.h" + +#include <math.h> +#include <vector> + +CCL_NAMESPACE_BEGIN + +static uint cmj_hash(uint i, uint p) +{ + i ^= p; + i ^= i >> 17; + i ^= i >> 10; + i *= 0xb36534e5; + i ^= i >> 12; + i ^= i >> 21; + i *= 0x93fc4795; + i ^= 0xdf6e307f; + i ^= i >> 17; + i *= 1 | p >> 18; + + return i; +} + +static float cmj_randfloat(uint i, uint p) +{ + return cmj_hash(i, p) * (1.0f / 4294967808.0f); +} + +class PMJ_Generator { + public: + static void generate_2D(float2 points[], int size, int rng_seed_in) + { + PMJ_Generator g(rng_seed_in); + points[0].x = g.rnd(); + points[0].y = g.rnd(); + int N = 1; + while (N < size) { + g.extend_sequence_even(points, N); + g.extend_sequence_odd(points, 2 * N); + N = 4 * N; + } + } + + protected: + PMJ_Generator(int rnd_seed_in) : num_samples(1), rnd_index(2), rnd_seed(rnd_seed_in) + { + } + + float rnd() + { + return cmj_randfloat(++rnd_index, rnd_seed); + } + + virtual void mark_occupied_strata(float2 points[], int N) + { + int NN = 2 * N; + for (int s = 0; s < NN; ++s) { + occupied1Dx[s] = occupied1Dy[s] = false; + } + for (int s = 0; s < N; ++s) { + int xstratum = (int)(NN * points[s].x); + int ystratum = (int)(NN * points[s].y); + occupied1Dx[xstratum] = true; + occupied1Dy[ystratum] = true; + } + } + + virtual void generate_sample_point( + float2 points[], float i, float j, float xhalf, float yhalf, int n, int N) + { + int NN = 2 * N; + float2 pt; + int xstratum, ystratum; + do { + pt.x = (i + 0.5f * (xhalf + rnd())) / n; + xstratum = (int)(NN * pt.x); + } while (occupied1Dx[xstratum]); + do { + pt.y = (j + 0.5f * (yhalf + rnd())) / n; + ystratum = (int)(NN * pt.y); + } while (occupied1Dy[ystratum]); + occupied1Dx[xstratum] = true; + occupied1Dy[ystratum] = true; + points[num_samples] = pt; + ++num_samples; + } + + void extend_sequence_even(float2 points[], int N) + { + int n = (int)sqrtf(N); + occupied1Dx.resize(2 * N); + occupied1Dy.resize(2 * N); + mark_occupied_strata(points, N); + for (int s = 0; s < N; ++s) { + float2 oldpt = points[s]; + float i = floorf(n * oldpt.x); + float j = floorf(n * oldpt.y); + float xhalf = floorf(2.0f * (n * oldpt.x - i)); + float yhalf = floorf(2.0f * (n * oldpt.y - j)); + xhalf = 1.0f - xhalf; + yhalf = 1.0f - yhalf; + generate_sample_point(points, i, j, xhalf, yhalf, n, N); + } + } + + void extend_sequence_odd(float2 points[], int N) + { + int n = (int)sqrtf(N / 2); + occupied1Dx.resize(2 * N); + occupied1Dy.resize(2 * N); + mark_occupied_strata(points, N); + std::vector<float> xhalves(N / 2); + std::vector<float> yhalves(N / 2); + for (int s = 0; s < N / 2; ++s) { + float2 oldpt = points[s]; + float i = floorf(n * oldpt.x); + float j = floorf(n * oldpt.y); + float xhalf = floorf(2.0f * (n * oldpt.x - i)); + float yhalf = floorf(2.0f * (n * oldpt.y - j)); + if (rnd() > 0.5f) { + xhalf = 1.0f - xhalf; + } + else { + yhalf = 1.0f - yhalf; + } + xhalves[s] = xhalf; + yhalves[s] = yhalf; + generate_sample_point(points, i, j, xhalf, yhalf, n, N); + } + for (int s = 0; s < N / 2; ++s) { + float2 oldpt = points[s]; + float i = floorf(n * oldpt.x); + float j = floorf(n * oldpt.y); + float xhalf = 1.0f - xhalves[s]; + float yhalf = 1.0f - yhalves[s]; + generate_sample_point(points, i, j, xhalf, yhalf, n, N); + } + } + + std::vector<bool> occupied1Dx, occupied1Dy; + int num_samples; + int rnd_index, rnd_seed; +}; + +class PMJ02_Generator : public PMJ_Generator { + protected: + void generate_sample_point( + float2 points[], float i, float j, float xhalf, float yhalf, int n, int N) override + { + int NN = 2 * N; + float2 pt; + do { + pt.x = (i + 0.5f * (xhalf + rnd())) / n; + pt.y = (j + 0.5f * (yhalf + rnd())) / n; + } while (is_occupied(pt, NN)); + mark_occupied_strata1(pt, NN); + points[num_samples] = pt; + ++num_samples; + } + + void mark_occupied_strata(float2 points[], int N) override + { + int NN = 2 * N; + int num_shapes = (int)log2f(NN) + 1; + occupiedStrata.resize(num_shapes); + for (int shape = 0; shape < num_shapes; ++shape) { + occupiedStrata[shape].resize(NN); + for (int n = 0; n < NN; ++n) { + occupiedStrata[shape][n] = false; + } + } + for (int s = 0; s < N; ++s) { + mark_occupied_strata1(points[s], NN); + } + } + + void mark_occupied_strata1(float2 pt, int NN) + { + int shape = 0; + int xdivs = NN; + int ydivs = 1; + do { + int xstratum = (int)(xdivs * pt.x); + int ystratum = (int)(ydivs * pt.y); + size_t index = ystratum * xdivs + xstratum; + assert(index < NN); + occupiedStrata[shape][index] = true; + shape = shape + 1; + xdivs = xdivs / 2; + ydivs = ydivs * 2; + } while (xdivs > 0); + } + + bool is_occupied(float2 pt, int NN) + { + int shape = 0; + int xdivs = NN; + int ydivs = 1; + do { + int xstratum = (int)(xdivs * pt.x); + int ystratum = (int)(ydivs * pt.y); + size_t index = ystratum * xdivs + xstratum; + assert(index < NN); + if (occupiedStrata[shape][index]) { + return true; + } + shape = shape + 1; + xdivs = xdivs / 2; + ydivs = ydivs * 2; + } while (xdivs > 0); + return false; + } + + private: + std::vector<std::vector<bool>> occupiedStrata; +}; + +static void shuffle(float2 points[], int size, int rng_seed) +{ + /* Offset samples by 1.0 for faster scrambling in kernel_random.h */ + for (int i = 0; i < size; ++i) { + points[i].x += 1.0f; + points[i].y += 1.0f; + } + + if (rng_seed == 0) { + return; + } + + constexpr int odd[8] = {0, 1, 4, 5, 10, 11, 14, 15}; + constexpr int even[8] = {2, 3, 6, 7, 8, 9, 12, 13}; + + int rng_index = 0; + for (int yy = 0; yy < size / 16; ++yy) { + for (int xx = 0; xx < 8; ++xx) { + int other = (int)(cmj_randfloat(++rng_index, rng_seed) * (8.0f - xx) + xx); + float2 tmp = points[odd[other] + yy * 16]; + points[odd[other] + yy * 16] = points[odd[xx] + yy * 16]; + points[odd[xx] + yy * 16] = tmp; + } + for (int xx = 0; xx < 8; ++xx) { + int other = (int)(cmj_randfloat(++rng_index, rng_seed) * (8.0f - xx) + xx); + float2 tmp = points[even[other] + yy * 16]; + points[even[other] + yy * 16] = points[even[xx] + yy * 16]; + points[even[xx] + yy * 16] = tmp; + } + } +} + +void progressive_multi_jitter_generate_2D(float2 points[], int size, int rng_seed) +{ + PMJ_Generator::generate_2D(points, size, rng_seed); + shuffle(points, size, rng_seed); +} + +void progressive_multi_jitter_02_generate_2D(float2 points[], int size, int rng_seed) +{ + PMJ02_Generator::generate_2D(points, size, rng_seed); + shuffle(points, size, rng_seed); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/render/jitter.h b/intern/cycles/render/jitter.h new file mode 100644 index 00000000000..ed34c7a4f4d --- /dev/null +++ b/intern/cycles/render/jitter.h @@ -0,0 +1,29 @@ +/* + * Copyright 2019 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __JITTER_H__ +#define __JITTER_H__ + +#include "util/util_types.h" + +CCL_NAMESPACE_BEGIN + +void progressive_multi_jitter_generate_2D(float2 points[], int size, int rng_seed); +void progressive_multi_jitter_02_generate_2D(float2 points[], int size, int rng_seed); + +CCL_NAMESPACE_END + +#endif /* __JITTER_H__ */ diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index 664217d6f26..9adf8e5341a 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -14,12 +14,12 @@ * limitations under the License. */ -#include "render/background.h" +#include "render/light.h" #include "device/device.h" -#include "render/integrator.h" +#include "render/background.h" #include "render/film.h" #include "render/graph.h" -#include "render/light.h" +#include "render/integrator.h" #include "render/mesh.h" #include "render/nodes.h" #include "render/object.h" @@ -28,9 +28,9 @@ #include "util/util_foreach.h" #include "util/util_hash.h" +#include "util/util_logging.h" #include "util/util_path.h" #include "util/util_progress.h" -#include "util/util_logging.h" CCL_NAMESPACE_BEGIN @@ -232,7 +232,10 @@ void LightManager::disable_ineffective_light(Scene *scene) bool LightManager::object_usable_as_light(Object *object) { - Mesh *mesh = object->mesh; + Geometry *geom = object->geometry; + if (geom->type != Geometry::MESH) { + return false; + } /* Skip objects with NaNs */ if (!object->bounds.valid()) { return false; @@ -243,10 +246,10 @@ bool LightManager::object_usable_as_light(Object *object) } /* Skip if we have no emission shaders. */ /* TODO(sergey): Ideally we want to avoid such duplicated loop, since it'll - * iterate all mesh shaders twice (when counting and when calculating + * iterate all geometry shaders twice (when counting and when calculating * triangle area. */ - foreach (const Shader *shader, mesh->used_shaders) { + foreach (const Shader *shader, geom->used_shaders) { if (shader->use_mis && shader->has_surface_emission) { return true; } @@ -285,8 +288,9 @@ void LightManager::device_update_distribution(Device *, if (!object_usable_as_light(object)) { continue; } + /* Count triangles. */ - Mesh *mesh = object->mesh; + Mesh *mesh = static_cast<Mesh *>(object->geometry); size_t mesh_num_triangles = mesh->num_triangles(); for (size_t i = 0; i < mesh_num_triangles; i++) { int shader_index = mesh->shader[i]; @@ -320,7 +324,7 @@ void LightManager::device_update_distribution(Device *, continue; } /* Sum area. */ - Mesh *mesh = object->mesh; + Mesh *mesh = static_cast<Mesh *>(object->geometry); bool transform_applied = mesh->transform_applied; Transform tfm = object->tfm; int object_id = j; @@ -352,7 +356,7 @@ void LightManager::device_update_distribution(Device *, if (shader->use_mis && shader->has_surface_emission) { distribution[offset].totarea = totarea; - distribution[offset].prim = i + mesh->tri_offset; + distribution[offset].prim = i + mesh->prim_offset; distribution[offset].mesh_light.shader_flag = shader_flag; distribution[offset].mesh_light.object_id = object_id; offset++; @@ -573,8 +577,8 @@ void LightManager::device_update_background(Device *device, if (node->type == EnvironmentTextureNode::node_type) { EnvironmentTextureNode *env = (EnvironmentTextureNode *)node; ImageMetaData metadata; - if (env->image_manager && !env->slots.empty() && - env->image_manager->get_image_metadata(env->slots[0], metadata)) { + if (!env->handle.empty()) { + ImageMetaData metadata = env->handle.metadata(); res.x = max(res.x, metadata.width); res.y = max(res.y, metadata.height); } diff --git a/intern/cycles/render/merge.cpp b/intern/cycles/render/merge.cpp index cac07e59fe3..3ea3952b96c 100644 --- a/intern/cycles/render/merge.cpp +++ b/intern/cycles/render/merge.cpp @@ -22,8 +22,8 @@ #include "util/util_time.h" #include "util/util_unique_ptr.h" -#include <OpenImageIO/imageio.h> #include <OpenImageIO/filesystem.h> +#include <OpenImageIO/imageio.h> OIIO_NAMESPACE_USING diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index d9e6d998ebd..c262d770331 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -17,32 +17,22 @@ #include "bvh/bvh.h" #include "bvh/bvh_build.h" -#include "render/camera.h" -#include "render/curves.h" #include "device/device.h" + #include "render/graph.h" -#include "render/shader.h" -#include "render/light.h" +#include "render/hair.h" #include "render/mesh.h" -#include "render/nodes.h" #include "render/object.h" #include "render/scene.h" -#include "render/stats.h" - -#include "kernel/osl/osl_globals.h" -#include "subd/subd_split.h" #include "subd/subd_patch_table.h" +#include "subd/subd_split.h" #include "util/util_foreach.h" #include "util/util_logging.h" #include "util/util_progress.h" #include "util/util_set.h" -#ifdef WITH_EMBREE -# include "bvh/bvh_embree.h" -#endif - CCL_NAMESPACE_BEGIN /* Triangle */ @@ -120,263 +110,6 @@ bool Mesh::Triangle::valid(const float3 *verts) const return isfinite3_safe(verts[v[0]]) && isfinite3_safe(verts[v[1]]) && isfinite3_safe(verts[v[2]]); } -/* Curve */ - -void Mesh::Curve::bounds_grow(const int k, - const float3 *curve_keys, - const float *curve_radius, - BoundBox &bounds) const -{ - float3 P[4]; - - P[0] = curve_keys[max(first_key + k - 1, first_key)]; - P[1] = curve_keys[first_key + k]; - P[2] = curve_keys[first_key + k + 1]; - P[3] = curve_keys[min(first_key + k + 2, first_key + num_keys - 1)]; - - float3 lower; - float3 upper; - - curvebounds(&lower.x, &upper.x, P, 0); - curvebounds(&lower.y, &upper.y, P, 1); - curvebounds(&lower.z, &upper.z, P, 2); - - float mr = max(curve_radius[first_key + k], curve_radius[first_key + k + 1]); - - bounds.grow(lower, mr); - bounds.grow(upper, mr); -} - -void Mesh::Curve::bounds_grow(const int k, - const float3 *curve_keys, - const float *curve_radius, - const Transform &aligned_space, - BoundBox &bounds) const -{ - float3 P[4]; - - P[0] = curve_keys[max(first_key + k - 1, first_key)]; - P[1] = curve_keys[first_key + k]; - P[2] = curve_keys[first_key + k + 1]; - P[3] = curve_keys[min(first_key + k + 2, first_key + num_keys - 1)]; - - P[0] = transform_point(&aligned_space, P[0]); - P[1] = transform_point(&aligned_space, P[1]); - P[2] = transform_point(&aligned_space, P[2]); - P[3] = transform_point(&aligned_space, P[3]); - - float3 lower; - float3 upper; - - curvebounds(&lower.x, &upper.x, P, 0); - curvebounds(&lower.y, &upper.y, P, 1); - curvebounds(&lower.z, &upper.z, P, 2); - - float mr = max(curve_radius[first_key + k], curve_radius[first_key + k + 1]); - - bounds.grow(lower, mr); - bounds.grow(upper, mr); -} - -void Mesh::Curve::bounds_grow(float4 keys[4], BoundBox &bounds) const -{ - float3 P[4] = { - float4_to_float3(keys[0]), - float4_to_float3(keys[1]), - float4_to_float3(keys[2]), - float4_to_float3(keys[3]), - }; - - float3 lower; - float3 upper; - - curvebounds(&lower.x, &upper.x, P, 0); - curvebounds(&lower.y, &upper.y, P, 1); - curvebounds(&lower.z, &upper.z, P, 2); - - float mr = max(keys[1].w, keys[2].w); - - bounds.grow(lower, mr); - bounds.grow(upper, mr); -} - -void Mesh::Curve::motion_keys(const float3 *curve_keys, - const float *curve_radius, - const float3 *key_steps, - size_t num_curve_keys, - size_t num_steps, - float time, - size_t k0, - size_t k1, - float4 r_keys[2]) const -{ - /* Figure out which steps we need to fetch and their interpolation factor. */ - const size_t max_step = num_steps - 1; - const size_t step = min((int)(time * max_step), max_step - 1); - const float t = time * max_step - step; - /* Fetch vertex coordinates. */ - float4 curr_keys[2]; - float4 next_keys[2]; - keys_for_step( - curve_keys, curve_radius, key_steps, num_curve_keys, num_steps, step, k0, k1, curr_keys); - keys_for_step( - curve_keys, curve_radius, key_steps, num_curve_keys, num_steps, step + 1, k0, k1, next_keys); - /* Interpolate between steps. */ - r_keys[0] = (1.0f - t) * curr_keys[0] + t * next_keys[0]; - r_keys[1] = (1.0f - t) * curr_keys[1] + t * next_keys[1]; -} - -void Mesh::Curve::cardinal_motion_keys(const float3 *curve_keys, - const float *curve_radius, - const float3 *key_steps, - size_t num_curve_keys, - size_t num_steps, - float time, - size_t k0, - size_t k1, - size_t k2, - size_t k3, - float4 r_keys[4]) const -{ - /* Figure out which steps we need to fetch and their interpolation factor. */ - const size_t max_step = num_steps - 1; - const size_t step = min((int)(time * max_step), max_step - 1); - const float t = time * max_step - step; - /* Fetch vertex coordinates. */ - float4 curr_keys[4]; - float4 next_keys[4]; - cardinal_keys_for_step(curve_keys, - curve_radius, - key_steps, - num_curve_keys, - num_steps, - step, - k0, - k1, - k2, - k3, - curr_keys); - cardinal_keys_for_step(curve_keys, - curve_radius, - key_steps, - num_curve_keys, - num_steps, - step + 1, - k0, - k1, - k2, - k3, - next_keys); - /* Interpolate between steps. */ - r_keys[0] = (1.0f - t) * curr_keys[0] + t * next_keys[0]; - r_keys[1] = (1.0f - t) * curr_keys[1] + t * next_keys[1]; - r_keys[2] = (1.0f - t) * curr_keys[2] + t * next_keys[2]; - r_keys[3] = (1.0f - t) * curr_keys[3] + t * next_keys[3]; -} - -void Mesh::Curve::keys_for_step(const float3 *curve_keys, - const float *curve_radius, - const float3 *key_steps, - size_t num_curve_keys, - size_t num_steps, - size_t step, - size_t k0, - size_t k1, - float4 r_keys[2]) const -{ - k0 = max(k0, 0); - k1 = min(k1, num_keys - 1); - const size_t center_step = ((num_steps - 1) / 2); - if (step == center_step) { - /* Center step: regular key location. */ - /* TODO(sergey): Consider adding make_float4(float3, float) - * function. - */ - r_keys[0] = make_float4(curve_keys[first_key + k0].x, - curve_keys[first_key + k0].y, - curve_keys[first_key + k0].z, - curve_radius[first_key + k0]); - r_keys[1] = make_float4(curve_keys[first_key + k1].x, - curve_keys[first_key + k1].y, - curve_keys[first_key + k1].z, - curve_radius[first_key + k1]); - } - else { - /* Center step is not stored in this array. */ - if (step > center_step) { - step--; - } - const size_t offset = first_key + step * num_curve_keys; - r_keys[0] = make_float4(key_steps[offset + k0].x, - key_steps[offset + k0].y, - key_steps[offset + k0].z, - curve_radius[first_key + k0]); - r_keys[1] = make_float4(key_steps[offset + k1].x, - key_steps[offset + k1].y, - key_steps[offset + k1].z, - curve_radius[first_key + k1]); - } -} - -void Mesh::Curve::cardinal_keys_for_step(const float3 *curve_keys, - const float *curve_radius, - const float3 *key_steps, - size_t num_curve_keys, - size_t num_steps, - size_t step, - size_t k0, - size_t k1, - size_t k2, - size_t k3, - float4 r_keys[4]) const -{ - k0 = max(k0, 0); - k3 = min(k3, num_keys - 1); - const size_t center_step = ((num_steps - 1) / 2); - if (step == center_step) { - /* Center step: regular key location. */ - r_keys[0] = make_float4(curve_keys[first_key + k0].x, - curve_keys[first_key + k0].y, - curve_keys[first_key + k0].z, - curve_radius[first_key + k0]); - r_keys[1] = make_float4(curve_keys[first_key + k1].x, - curve_keys[first_key + k1].y, - curve_keys[first_key + k1].z, - curve_radius[first_key + k1]); - r_keys[2] = make_float4(curve_keys[first_key + k2].x, - curve_keys[first_key + k2].y, - curve_keys[first_key + k2].z, - curve_radius[first_key + k2]); - r_keys[3] = make_float4(curve_keys[first_key + k3].x, - curve_keys[first_key + k3].y, - curve_keys[first_key + k3].z, - curve_radius[first_key + k3]); - } - else { - /* Center step is not stored in this array. */ - if (step > center_step) { - step--; - } - const size_t offset = first_key + step * num_curve_keys; - r_keys[0] = make_float4(key_steps[offset + k0].x, - key_steps[offset + k0].y, - key_steps[offset + k0].z, - curve_radius[first_key + k0]); - r_keys[1] = make_float4(key_steps[offset + k1].x, - key_steps[offset + k1].y, - key_steps[offset + k1].z, - curve_radius[first_key + k1]); - r_keys[2] = make_float4(key_steps[offset + k2].x, - key_steps[offset + k2].y, - key_steps[offset + k2].z, - curve_radius[first_key + k2]); - r_keys[3] = make_float4(key_steps[offset + k3].x, - key_steps[offset + k3].y, - key_steps[offset + k3].z, - curve_radius[first_key + k3]); - } -} - /* SubdFace */ float3 Mesh::SubdFace::normal(const Mesh *mesh) const @@ -392,60 +125,29 @@ float3 Mesh::SubdFace::normal(const Mesh *mesh) const NODE_DEFINE(Mesh) { - NodeType *type = NodeType::add("mesh", create); - - SOCKET_UINT(motion_steps, "Motion Steps", 3); - SOCKET_BOOLEAN(use_motion_blur, "Use Motion Blur", false); + NodeType *type = NodeType::add("mesh", create, NodeType::NONE, Geometry::node_base_type); SOCKET_INT_ARRAY(triangles, "Triangles", array<int>()); SOCKET_POINT_ARRAY(verts, "Vertices", array<float3>()); SOCKET_INT_ARRAY(shader, "Shader", array<int>()); SOCKET_BOOLEAN_ARRAY(smooth, "Smooth", array<bool>()); - SOCKET_POINT_ARRAY(curve_keys, "Curve Keys", array<float3>()); - SOCKET_FLOAT_ARRAY(curve_radius, "Curve Radius", array<float>()); - SOCKET_INT_ARRAY(curve_first_key, "Curve First Key", array<int>()); - SOCKET_INT_ARRAY(curve_shader, "Curve Shader", array<int>()); - return type; } -Mesh::Mesh() : Node(node_type) +Mesh::Mesh() : Geometry(node_type, Geometry::MESH), subd_attributes(this, ATTR_PRIM_SUBD) { - need_update = true; - need_update_rebuild = false; - transform_applied = false; - transform_negative_scaled = false; - transform_normal = transform_identity(); - bounds = BoundBox::empty; - - bvh = NULL; - - tri_offset = 0; vert_offset = 0; - curve_offset = 0; - curvekey_offset = 0; - patch_offset = 0; face_offset = 0; corner_offset = 0; - attr_map_offset = 0; - - prim_offset = 0; - num_subd_verts = 0; - attributes.triangle_mesh = this; - curve_attributes.curve_mesh = this; - subd_attributes.subd_mesh = this; - - geometry_flags = GEOMETRY_NONE; - - volume_isovalue = 0.001f; - has_volume = false; - has_surface_bssrdf = false; + volume_clipping = 0.001f; + volume_step_size = 0.0f; + volume_object_space = false; num_ngons = 0; @@ -457,7 +159,6 @@ Mesh::Mesh() : Node(node_type) Mesh::~Mesh() { - delete bvh; delete patch_table; delete subd_params; } @@ -493,26 +194,6 @@ void Mesh::reserve_mesh(int numverts, int numtris) attributes.resize(true); } -void Mesh::resize_curves(int numcurves, int numkeys) -{ - curve_keys.resize(numkeys); - curve_radius.resize(numkeys); - curve_first_key.resize(numcurves); - curve_shader.resize(numcurves); - - curve_attributes.resize(); -} - -void Mesh::reserve_curves(int numcurves, int numkeys) -{ - curve_keys.reserve(numkeys); - curve_radius.reserve(numkeys); - curve_first_key.reserve(numcurves); - curve_shader.reserve(numcurves); - - curve_attributes.resize(true); -} - void Mesh::resize_subd_faces(int numfaces, int num_ngons_, int numcorners) { subd_faces.resize(numfaces); @@ -533,6 +214,8 @@ void Mesh::reserve_subd_faces(int numfaces, int num_ngons_, int numcorners) void Mesh::clear(bool preserve_voxel_data) { + Geometry::clear(); + /* clear all verts and triangles */ verts.clear(); triangles.clear(); @@ -542,11 +225,6 @@ void Mesh::clear(bool preserve_voxel_data) triangle_patch.clear(); vert_patch_uv.clear(); - curve_keys.clear(); - curve_radius.clear(); - curve_first_key.clear(); - curve_shader.clear(); - subd_faces.clear(); subd_face_corners.clear(); @@ -554,27 +232,21 @@ void Mesh::clear(bool preserve_voxel_data) subd_creases.clear(); - curve_attributes.clear(); subd_attributes.clear(); attributes.clear(preserve_voxel_data); - used_shaders.clear(); - vert_to_stitching_key_map.clear(); vert_stitching_map.clear(); - if (!preserve_voxel_data) { - geometry_flags = GEOMETRY_NONE; - } - - transform_applied = false; - transform_negative_scaled = false; - transform_normal = transform_identity(); - delete patch_table; patch_table = NULL; } +void Mesh::clear() +{ + clear(false); +} + void Mesh::add_vertex(float3 P) { verts.push_back_reserved(P); @@ -606,18 +278,6 @@ void Mesh::add_triangle(int v0, int v1, int v2, int shader_, bool smooth_) } } -void Mesh::add_curve_key(float3 co, float radius) -{ - curve_keys.push_back_reserved(co); - curve_radius.push_back_reserved(radius); -} - -void Mesh::add_curve(int first_key, int shader) -{ - curve_first_key.push_back_reserved(first_key); - curve_shader.push_back_reserved(shader); -} - void Mesh::add_subd_face(int *corners, int num_corners, int shader_, bool smooth_) { int start_corner = subd_face_corners.size(); @@ -637,47 +297,41 @@ void Mesh::add_subd_face(int *corners, int num_corners, int shader_, bool smooth subd_faces.push_back_reserved(face); } -static void get_uv_tiles_from_attribute(Attribute *attr, int num, unordered_set<int> &tiles) +void Mesh::copy_center_to_motion_step(const int motion_step) { - if (attr == NULL) { - return; - } + Attribute *attr_mP = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); - const float2 *uv = attr->data_float2(); - for (int i = 0; i < num; i++, uv++) { - float u = uv->x, v = uv->y; - int x = (int)u, y = (int)v; + if (attr_mP) { + Attribute *attr_mN = attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL); + Attribute *attr_N = attributes.find(ATTR_STD_VERTEX_NORMAL); + float3 *P = &verts[0]; + float3 *N = (attr_N) ? attr_N->data_float3() : NULL; + size_t numverts = verts.size(); - if (x < 0 || y < 0 || x >= 10) { - continue; - } - - /* Be conservative in corners - precisely touching the right or upper edge of a tile - * should not load its right/upper neighbor as well. */ - if (x > 0 && (u < x + 1e-6f)) { - x--; - } - if (y > 0 && (v < y + 1e-6f)) { - y--; - } - - tiles.insert(1001 + 10 * y + x); + memcpy(attr_mP->data_float3() + motion_step * numverts, P, sizeof(float3) * numverts); + if (attr_mN) + memcpy(attr_mN->data_float3() + motion_step * numverts, N, sizeof(float3) * numverts); } } void Mesh::get_uv_tiles(ustring map, unordered_set<int> &tiles) { + Attribute *attr, *subd_attr; + if (map.empty()) { - get_uv_tiles_from_attribute(attributes.find(ATTR_STD_UV), num_triangles() * 3, tiles); - get_uv_tiles_from_attribute( - subd_attributes.find(ATTR_STD_UV), subd_face_corners.size() + num_ngons, tiles); - get_uv_tiles_from_attribute(curve_attributes.find(ATTR_STD_UV), num_curves(), tiles); + attr = attributes.find(ATTR_STD_UV); + subd_attr = subd_attributes.find(ATTR_STD_UV); } else { - get_uv_tiles_from_attribute(attributes.find(map), num_triangles() * 3, tiles); - get_uv_tiles_from_attribute( - subd_attributes.find(map), subd_face_corners.size() + num_ngons, tiles); - get_uv_tiles_from_attribute(curve_attributes.find(map), num_curves(), tiles); + attr = attributes.find(map); + subd_attr = subd_attributes.find(map); + } + + if (attr) { + attr->get_uv_tiles(this, ATTR_PRIM_GEOMETRY, tiles); + } + if (subd_attr) { + subd_attr->get_uv_tiles(this, ATTR_PRIM_SUBD, tiles); } } @@ -685,15 +339,11 @@ void Mesh::compute_bounds() { BoundBox bnds = BoundBox::empty; size_t verts_size = verts.size(); - size_t curve_keys_size = curve_keys.size(); - if (verts_size + curve_keys_size > 0) { + if (verts_size > 0) { for (size_t i = 0; i < verts_size; i++) bnds.grow(verts[i]); - for (size_t i = 0; i < curve_keys_size; i++) - bnds.grow(curve_keys[i], curve_radius[i]); - Attribute *attr = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); if (use_motion_blur && attr) { size_t steps_size = verts.size() * (motion_steps - 1); @@ -703,15 +353,6 @@ void Mesh::compute_bounds() bnds.grow(vert_steps[i]); } - Attribute *curve_attr = curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); - if (use_motion_blur && curve_attr) { - size_t steps_size = curve_keys.size() * (motion_steps - 1); - float3 *key_steps = curve_attr->data_float3(); - - for (size_t i = 0; i < steps_size; i++) - bnds.grow(key_steps[i]); - } - if (!bnds.valid()) { bnds = BoundBox::empty; @@ -719,9 +360,6 @@ void Mesh::compute_bounds() for (size_t i = 0; i < verts_size; i++) bnds.grow_safe(verts[i]); - for (size_t i = 0; i < curve_keys_size; i++) - bnds.grow_safe(curve_keys[i], curve_radius[i]); - if (use_motion_blur && attr) { size_t steps_size = verts.size() * (motion_steps - 1); float3 *vert_steps = attr->data_float3(); @@ -729,14 +367,6 @@ void Mesh::compute_bounds() for (size_t i = 0; i < steps_size; i++) bnds.grow_safe(vert_steps[i]); } - - if (use_motion_blur && curve_attr) { - size_t steps_size = curve_keys.size() * (motion_steps - 1); - float3 *key_steps = curve_attr->data_float3(); - - for (size_t i = 0; i < steps_size; i++) - bnds.grow_safe(key_steps[i]); - } } } @@ -748,6 +378,38 @@ void Mesh::compute_bounds() bounds = bnds; } +void Mesh::apply_transform(const Transform &tfm, const bool apply_to_motion) +{ + transform_normal = transform_transposed_inverse(tfm); + + /* apply to mesh vertices */ + for (size_t i = 0; i < verts.size(); i++) + verts[i] = transform_point(&tfm, verts[i]); + + if (apply_to_motion) { + Attribute *attr = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + + if (attr) { + size_t steps_size = verts.size() * (motion_steps - 1); + float3 *vert_steps = attr->data_float3(); + + for (size_t i = 0; i < steps_size; i++) + vert_steps[i] = transform_point(&tfm, vert_steps[i]); + } + + Attribute *attr_N = attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL); + + if (attr_N) { + Transform ntfm = transform_normal; + size_t steps_size = verts.size() * (motion_steps - 1); + float3 *normal_steps = attr_N->data_float3(); + + for (size_t i = 0; i < steps_size; i++) + normal_steps[i] = normalize(transform_direction(&ntfm, normal_steps[i])); + } + } +} + void Mesh::add_face_normals() { /* don't compute if already there */ @@ -885,8 +547,7 @@ void Mesh::add_undisplaced() float3 *data = attr->data_float3(); /* copy verts */ - size_t size = attr->buffer_size( - this, (subdivision_type == SUBDIVISION_NONE) ? ATTR_PRIM_TRIANGLE : ATTR_PRIM_SUBD); + size_t size = attr->buffer_size(this, attrs.prim); /* Center points for ngons aren't stored in Mesh::verts but are included in size since they will * be calculated later, we subtract them from size here so we don't have an overflow while @@ -975,39 +636,6 @@ void Mesh::pack_verts(const vector<uint> &tri_prim_index, } } -void Mesh::pack_curves(Scene *scene, - float4 *curve_key_co, - float4 *curve_data, - size_t curvekey_offset) -{ - size_t curve_keys_size = curve_keys.size(); - - /* pack curve keys */ - if (curve_keys_size) { - float3 *keys_ptr = curve_keys.data(); - float *radius_ptr = curve_radius.data(); - - for (size_t i = 0; i < curve_keys_size; i++) - curve_key_co[i] = make_float4(keys_ptr[i].x, keys_ptr[i].y, keys_ptr[i].z, radius_ptr[i]); - } - - /* pack curve segments */ - size_t curve_num = num_curves(); - - for (size_t i = 0; i < curve_num; i++) { - Curve curve = get_curve(i); - int shader_id = curve_shader[i]; - Shader *shader = (shader_id < used_shaders.size()) ? used_shaders[shader_id] : - scene->default_surface; - shader_id = scene->shader_manager->get_shader_id(shader, false); - - curve_data[i] = make_float4(__int_as_float(curve.first_key + curvekey_offset), - __int_as_float(curve.num_keys), - __int_as_float(shader_id), - 0.0f); - } -} - void Mesh::pack_patches(uint *patch_data, uint vert_offset, uint face_offset, uint corner_offset) { size_t num_faces = subd_faces.size(); @@ -1054,1391 +682,4 @@ void Mesh::pack_patches(uint *patch_data, uint vert_offset, uint face_offset, ui } } -void Mesh::compute_bvh( - Device *device, DeviceScene *dscene, SceneParams *params, Progress *progress, int n, int total) -{ - if (progress->get_cancel()) - return; - - compute_bounds(); - - const BVHLayout bvh_layout = BVHParams::best_bvh_layout(params->bvh_layout, - device->get_bvh_layout_mask()); - if (need_build_bvh(bvh_layout)) { - string msg = "Updating Mesh BVH "; - if (name.empty()) - msg += string_printf("%u/%u", (uint)(n + 1), (uint)total); - else - msg += string_printf("%s %u/%u", name.c_str(), (uint)(n + 1), (uint)total); - - Object object; - object.mesh = this; - - vector<Mesh *> meshes; - meshes.push_back(this); - vector<Object *> objects; - objects.push_back(&object); - - if (bvh && !need_update_rebuild) { - progress->set_status(msg, "Refitting BVH"); - - bvh->meshes = meshes; - bvh->objects = objects; - - bvh->refit(*progress); - } - else { - progress->set_status(msg, "Building BVH"); - - BVHParams bparams; - bparams.use_spatial_split = params->use_bvh_spatial_split; - bparams.bvh_layout = bvh_layout; - bparams.use_unaligned_nodes = dscene->data.bvh.have_curves && - params->use_bvh_unaligned_nodes; - bparams.num_motion_triangle_steps = params->num_bvh_time_steps; - bparams.num_motion_curve_steps = params->num_bvh_time_steps; - bparams.bvh_type = params->bvh_type; - bparams.curve_flags = dscene->data.curve.curveflags; - bparams.curve_subdivisions = dscene->data.curve.subdivisions; - - delete bvh; - bvh = BVH::create(bparams, meshes, objects); - MEM_GUARDED_CALL(progress, bvh->build, *progress); - } - } - - need_update = false; - need_update_rebuild = false; -} - -void Mesh::tag_update(Scene *scene, bool rebuild) -{ - need_update = true; - - if (rebuild) { - need_update_rebuild = true; - scene->light_manager->need_update = true; - } - else { - foreach (Shader *shader, used_shaders) - if (shader->has_surface_emission) - scene->light_manager->need_update = true; - } - - scene->mesh_manager->need_update = true; - scene->object_manager->need_update = true; -} - -bool Mesh::has_motion_blur() const -{ - return (use_motion_blur && (attributes.find(ATTR_STD_MOTION_VERTEX_POSITION) || - curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION))); -} - -bool Mesh::has_true_displacement() const -{ - foreach (Shader *shader, used_shaders) { - if (shader->has_displacement && shader->displacement_method != DISPLACE_BUMP) { - return true; - } - } - - return false; -} - -bool Mesh::has_voxel_attributes() const -{ - foreach (const Attribute &attr, attributes.attributes) { - if (attr.element == ATTR_ELEMENT_VOXEL) { - return true; - } - } - - return false; -} - -float Mesh::motion_time(int step) const -{ - return (motion_steps > 1) ? 2.0f * step / (motion_steps - 1) - 1.0f : 0.0f; -} - -int Mesh::motion_step(float time) const -{ - if (motion_steps > 1) { - int attr_step = 0; - - for (int step = 0; step < motion_steps; step++) { - float step_time = motion_time(step); - if (step_time == time) { - return attr_step; - } - - /* Center step is stored in a separate attribute. */ - if (step != motion_steps / 2) { - attr_step++; - } - } - } - - return -1; -} - -bool Mesh::need_build_bvh(BVHLayout layout) const -{ - return !transform_applied || has_surface_bssrdf || layout == BVH_LAYOUT_OPTIX; -} - -bool Mesh::is_instanced() const -{ - /* Currently we treat subsurface objects as instanced. - * - * While it might be not very optimal for ray traversal, it avoids having - * duplicated BVH in the memory, saving quite some space. - */ - return !transform_applied || has_surface_bssrdf; -} - -/* Mesh Manager */ - -MeshManager::MeshManager() -{ - need_update = true; - need_flags_update = true; -} - -MeshManager::~MeshManager() -{ -} - -void MeshManager::update_osl_attributes(Device *device, - Scene *scene, - vector<AttributeRequestSet> &mesh_attributes) -{ -#ifdef WITH_OSL - /* for OSL, a hash map is used to lookup the attribute by name. */ - OSLGlobals *og = (OSLGlobals *)device->osl_memory(); - - og->object_name_map.clear(); - og->attribute_map.clear(); - og->object_names.clear(); - - og->attribute_map.resize(scene->objects.size() * ATTR_PRIM_TYPES); - - for (size_t i = 0; i < scene->objects.size(); i++) { - /* set object name to object index map */ - Object *object = scene->objects[i]; - og->object_name_map[object->name] = i; - og->object_names.push_back(object->name); - - /* set object attributes */ - foreach (ParamValue &attr, object->attributes) { - OSLGlobals::Attribute osl_attr; - - osl_attr.type = attr.type(); - osl_attr.desc.element = ATTR_ELEMENT_OBJECT; - osl_attr.value = attr; - osl_attr.desc.offset = 0; - osl_attr.desc.flags = 0; - - og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_TRIANGLE][attr.name()] = osl_attr; - og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_CURVE][attr.name()] = osl_attr; - og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_SUBD][attr.name()] = osl_attr; - } - - /* find mesh attributes */ - size_t j; - - for (j = 0; j < scene->meshes.size(); j++) - if (scene->meshes[j] == object->mesh) - break; - - AttributeRequestSet &attributes = mesh_attributes[j]; - - /* set object attributes */ - foreach (AttributeRequest &req, attributes.requests) { - OSLGlobals::Attribute osl_attr; - - if (req.triangle_desc.element != ATTR_ELEMENT_NONE) { - osl_attr.desc = req.triangle_desc; - - if (req.triangle_type == TypeDesc::TypeFloat) - osl_attr.type = TypeDesc::TypeFloat; - else if (req.triangle_type == TypeDesc::TypeMatrix) - osl_attr.type = TypeDesc::TypeMatrix; - else if (req.triangle_type == TypeFloat2) - osl_attr.type = TypeFloat2; - else if (req.triangle_type == TypeRGBA) - osl_attr.type = TypeRGBA; - else - osl_attr.type = TypeDesc::TypeColor; - - if (req.std != ATTR_STD_NONE) { - /* if standard attribute, add lookup by geom: name convention */ - ustring stdname(string("geom:") + string(Attribute::standard_name(req.std))); - og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_TRIANGLE][stdname] = osl_attr; - } - else if (req.name != ustring()) { - /* add lookup by mesh attribute name */ - og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_TRIANGLE][req.name] = osl_attr; - } - } - - if (req.curve_desc.element != ATTR_ELEMENT_NONE) { - osl_attr.desc = req.curve_desc; - - if (req.curve_type == TypeDesc::TypeFloat) - osl_attr.type = TypeDesc::TypeFloat; - else if (req.curve_type == TypeDesc::TypeMatrix) - osl_attr.type = TypeDesc::TypeMatrix; - else if (req.curve_type == TypeFloat2) - osl_attr.type = TypeFloat2; - else if (req.curve_type == TypeRGBA) - osl_attr.type = TypeRGBA; - else - osl_attr.type = TypeDesc::TypeColor; - - if (req.std != ATTR_STD_NONE) { - /* if standard attribute, add lookup by geom: name convention */ - ustring stdname(string("geom:") + string(Attribute::standard_name(req.std))); - og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_CURVE][stdname] = osl_attr; - } - else if (req.name != ustring()) { - /* add lookup by mesh attribute name */ - og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_CURVE][req.name] = osl_attr; - } - } - - if (req.subd_desc.element != ATTR_ELEMENT_NONE) { - osl_attr.desc = req.subd_desc; - - if (req.subd_type == TypeDesc::TypeFloat) - osl_attr.type = TypeDesc::TypeFloat; - else if (req.subd_type == TypeDesc::TypeMatrix) - osl_attr.type = TypeDesc::TypeMatrix; - else if (req.subd_type == TypeFloat2) - osl_attr.type = TypeFloat2; - else if (req.subd_type == TypeRGBA) - osl_attr.type = TypeRGBA; - else - osl_attr.type = TypeDesc::TypeColor; - - if (req.std != ATTR_STD_NONE) { - /* if standard attribute, add lookup by geom: name convention */ - ustring stdname(string("geom:") + string(Attribute::standard_name(req.std))); - og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_SUBD][stdname] = osl_attr; - } - else if (req.name != ustring()) { - /* add lookup by mesh attribute name */ - og->attribute_map[i * ATTR_PRIM_TYPES + ATTR_PRIM_SUBD][req.name] = osl_attr; - } - } - } - } -#else - (void)device; - (void)scene; - (void)mesh_attributes; -#endif -} - -void MeshManager::update_svm_attributes(Device *, - DeviceScene *dscene, - Scene *scene, - vector<AttributeRequestSet> &mesh_attributes) -{ - /* for SVM, the attributes_map table is used to lookup the offset of an - * attribute, based on a unique shader attribute id. */ - - /* compute array stride */ - int attr_map_size = 0; - - for (size_t i = 0; i < scene->meshes.size(); i++) { - Mesh *mesh = scene->meshes[i]; - mesh->attr_map_offset = attr_map_size; - attr_map_size += (mesh_attributes[i].size() + 1) * ATTR_PRIM_TYPES; - } - - if (attr_map_size == 0) - return; - - /* create attribute map */ - uint4 *attr_map = dscene->attributes_map.alloc(attr_map_size); - memset(attr_map, 0, dscene->attributes_map.size() * sizeof(uint)); - - for (size_t i = 0; i < scene->meshes.size(); i++) { - Mesh *mesh = scene->meshes[i]; - AttributeRequestSet &attributes = mesh_attributes[i]; - - /* set object attributes */ - int index = mesh->attr_map_offset; - - foreach (AttributeRequest &req, attributes.requests) { - uint id; - - if (req.std == ATTR_STD_NONE) - id = scene->shader_manager->get_attribute_id(req.name); - else - id = scene->shader_manager->get_attribute_id(req.std); - - if (mesh->num_triangles()) { - attr_map[index].x = id; - attr_map[index].y = req.triangle_desc.element; - attr_map[index].z = as_uint(req.triangle_desc.offset); - - if (req.triangle_type == TypeDesc::TypeFloat) - attr_map[index].w = NODE_ATTR_FLOAT; - else if (req.triangle_type == TypeDesc::TypeMatrix) - attr_map[index].w = NODE_ATTR_MATRIX; - else if (req.triangle_type == TypeFloat2) - attr_map[index].w = NODE_ATTR_FLOAT2; - else if (req.triangle_type == TypeRGBA) - attr_map[index].w = NODE_ATTR_RGBA; - else - attr_map[index].w = NODE_ATTR_FLOAT3; - - attr_map[index].w |= req.triangle_desc.flags << 8; - } - - index++; - - if (mesh->num_curves()) { - attr_map[index].x = id; - attr_map[index].y = req.curve_desc.element; - attr_map[index].z = as_uint(req.curve_desc.offset); - - if (req.curve_type == TypeDesc::TypeFloat) - attr_map[index].w = NODE_ATTR_FLOAT; - else if (req.curve_type == TypeDesc::TypeMatrix) - attr_map[index].w = NODE_ATTR_MATRIX; - else if (req.curve_type == TypeFloat2) - attr_map[index].w = NODE_ATTR_FLOAT2; - else - attr_map[index].w = NODE_ATTR_FLOAT3; - - attr_map[index].w |= req.curve_desc.flags << 8; - } - - index++; - - if (mesh->subd_faces.size()) { - attr_map[index].x = id; - attr_map[index].y = req.subd_desc.element; - attr_map[index].z = as_uint(req.subd_desc.offset); - - if (req.subd_type == TypeDesc::TypeFloat) - attr_map[index].w = NODE_ATTR_FLOAT; - else if (req.subd_type == TypeDesc::TypeMatrix) - attr_map[index].w = NODE_ATTR_MATRIX; - else if (req.subd_type == TypeFloat2) - attr_map[index].w = NODE_ATTR_FLOAT2; - else if (req.triangle_type == TypeRGBA) - attr_map[index].w = NODE_ATTR_RGBA; - else - attr_map[index].w = NODE_ATTR_FLOAT3; - - attr_map[index].w |= req.subd_desc.flags << 8; - } - - index++; - } - - /* terminator */ - for (int j = 0; j < ATTR_PRIM_TYPES; j++) { - attr_map[index].x = ATTR_STD_NONE; - attr_map[index].y = 0; - attr_map[index].z = 0; - attr_map[index].w = 0; - - index++; - } - } - - /* copy to device */ - dscene->attributes_map.copy_to_device(); -} - -static void update_attribute_element_size(Mesh *mesh, - Attribute *mattr, - AttributePrimitive prim, - size_t *attr_float_size, - size_t *attr_float2_size, - size_t *attr_float3_size, - size_t *attr_uchar4_size) -{ - if (mattr) { - size_t size = mattr->element_size(mesh, prim); - - if (mattr->element == ATTR_ELEMENT_VOXEL) { - /* pass */ - } - else if (mattr->element == ATTR_ELEMENT_CORNER_BYTE) { - *attr_uchar4_size += size; - } - else if (mattr->type == TypeDesc::TypeFloat) { - *attr_float_size += size; - } - else if (mattr->type == TypeFloat2) { - *attr_float2_size += size; - } - else if (mattr->type == TypeDesc::TypeMatrix) { - *attr_float3_size += size * 4; - } - else { - *attr_float3_size += size; - } - } -} - -static void update_attribute_element_offset(Mesh *mesh, - device_vector<float> &attr_float, - size_t &attr_float_offset, - device_vector<float2> &attr_float2, - size_t &attr_float2_offset, - device_vector<float4> &attr_float3, - size_t &attr_float3_offset, - device_vector<uchar4> &attr_uchar4, - size_t &attr_uchar4_offset, - Attribute *mattr, - AttributePrimitive prim, - TypeDesc &type, - AttributeDescriptor &desc) -{ - if (mattr) { - /* store element and type */ - desc.element = mattr->element; - desc.flags = mattr->flags; - type = mattr->type; - - /* store attribute data in arrays */ - size_t size = mattr->element_size(mesh, prim); - - AttributeElement &element = desc.element; - int &offset = desc.offset; - - if (mattr->element == ATTR_ELEMENT_VOXEL) { - /* store slot in offset value */ - VoxelAttribute *voxel_data = mattr->data_voxel(); - offset = voxel_data->slot; - } - else if (mattr->element == ATTR_ELEMENT_CORNER_BYTE) { - uchar4 *data = mattr->data_uchar4(); - offset = attr_uchar4_offset; - - assert(attr_uchar4.size() >= offset + size); - for (size_t k = 0; k < size; k++) { - attr_uchar4[offset + k] = data[k]; - } - attr_uchar4_offset += size; - } - else if (mattr->type == TypeDesc::TypeFloat) { - float *data = mattr->data_float(); - offset = attr_float_offset; - - assert(attr_float.size() >= offset + size); - for (size_t k = 0; k < size; k++) { - attr_float[offset + k] = data[k]; - } - attr_float_offset += size; - } - else if (mattr->type == TypeFloat2) { - float2 *data = mattr->data_float2(); - offset = attr_float2_offset; - - assert(attr_float2.size() >= offset + size); - for (size_t k = 0; k < size; k++) { - attr_float2[offset + k] = data[k]; - } - attr_float2_offset += size; - } - else if (mattr->type == TypeDesc::TypeMatrix) { - Transform *tfm = mattr->data_transform(); - offset = attr_float3_offset; - - assert(attr_float3.size() >= offset + size * 3); - for (size_t k = 0; k < size * 3; k++) { - attr_float3[offset + k] = (&tfm->x)[k]; - } - attr_float3_offset += size * 3; - } - else { - float4 *data = mattr->data_float4(); - offset = attr_float3_offset; - - assert(attr_float3.size() >= offset + size); - for (size_t k = 0; k < size; k++) { - attr_float3[offset + k] = data[k]; - } - attr_float3_offset += size; - } - - /* mesh vertex/curve index is global, not per object, so we sneak - * a correction for that in here */ - if (mesh->subdivision_type == Mesh::SUBDIVISION_CATMULL_CLARK && - desc.flags & ATTR_SUBDIVIDED) { - /* indices for subdivided attributes are retrieved - * from patch table so no need for correction here*/ - } - else if (element == ATTR_ELEMENT_VERTEX) - offset -= mesh->vert_offset; - else if (element == ATTR_ELEMENT_VERTEX_MOTION) - offset -= mesh->vert_offset; - else if (element == ATTR_ELEMENT_FACE) { - if (prim == ATTR_PRIM_TRIANGLE) - offset -= mesh->tri_offset; - else - offset -= mesh->face_offset; - } - else if (element == ATTR_ELEMENT_CORNER || element == ATTR_ELEMENT_CORNER_BYTE) { - if (prim == ATTR_PRIM_TRIANGLE) - offset -= 3 * mesh->tri_offset; - else - offset -= mesh->corner_offset; - } - else if (element == ATTR_ELEMENT_CURVE) - offset -= mesh->curve_offset; - else if (element == ATTR_ELEMENT_CURVE_KEY) - offset -= mesh->curvekey_offset; - else if (element == ATTR_ELEMENT_CURVE_KEY_MOTION) - offset -= mesh->curvekey_offset; - } - else { - /* attribute not found */ - desc.element = ATTR_ELEMENT_NONE; - desc.offset = 0; - } -} - -void MeshManager::device_update_attributes(Device *device, - DeviceScene *dscene, - Scene *scene, - Progress &progress) -{ - progress.set_status("Updating Mesh", "Computing attributes"); - - /* gather per mesh requested attributes. as meshes may have multiple - * shaders assigned, this merges the requested attributes that have - * been set per shader by the shader manager */ - vector<AttributeRequestSet> mesh_attributes(scene->meshes.size()); - - for (size_t i = 0; i < scene->meshes.size(); i++) { - Mesh *mesh = scene->meshes[i]; - - scene->need_global_attributes(mesh_attributes[i]); - - foreach (Shader *shader, mesh->used_shaders) { - mesh_attributes[i].add(shader->attributes); - } - } - - /* mesh attribute are stored in a single array per data type. here we fill - * those arrays, and set the offset and element type to create attribute - * maps next */ - - /* Pre-allocate attributes to avoid arrays re-allocation which would - * take 2x of overall attribute memory usage. - */ - size_t attr_float_size = 0; - size_t attr_float2_size = 0; - size_t attr_float3_size = 0; - size_t attr_uchar4_size = 0; - for (size_t i = 0; i < scene->meshes.size(); i++) { - Mesh *mesh = scene->meshes[i]; - AttributeRequestSet &attributes = mesh_attributes[i]; - foreach (AttributeRequest &req, attributes.requests) { - Attribute *triangle_mattr = mesh->attributes.find(req); - Attribute *curve_mattr = mesh->curve_attributes.find(req); - Attribute *subd_mattr = mesh->subd_attributes.find(req); - - update_attribute_element_size(mesh, - triangle_mattr, - ATTR_PRIM_TRIANGLE, - &attr_float_size, - &attr_float2_size, - &attr_float3_size, - &attr_uchar4_size); - update_attribute_element_size(mesh, - curve_mattr, - ATTR_PRIM_CURVE, - &attr_float_size, - &attr_float2_size, - &attr_float3_size, - &attr_uchar4_size); - update_attribute_element_size(mesh, - subd_mattr, - ATTR_PRIM_SUBD, - &attr_float_size, - &attr_float2_size, - &attr_float3_size, - &attr_uchar4_size); - } - } - - dscene->attributes_float.alloc(attr_float_size); - dscene->attributes_float2.alloc(attr_float2_size); - dscene->attributes_float3.alloc(attr_float3_size); - dscene->attributes_uchar4.alloc(attr_uchar4_size); - - size_t attr_float_offset = 0; - size_t attr_float2_offset = 0; - size_t attr_float3_offset = 0; - size_t attr_uchar4_offset = 0; - - /* Fill in attributes. */ - for (size_t i = 0; i < scene->meshes.size(); i++) { - Mesh *mesh = scene->meshes[i]; - AttributeRequestSet &attributes = mesh_attributes[i]; - - /* todo: we now store std and name attributes from requests even if - * they actually refer to the same mesh attributes, optimize */ - foreach (AttributeRequest &req, attributes.requests) { - Attribute *triangle_mattr = mesh->attributes.find(req); - Attribute *curve_mattr = mesh->curve_attributes.find(req); - Attribute *subd_mattr = mesh->subd_attributes.find(req); - - update_attribute_element_offset(mesh, - dscene->attributes_float, - attr_float_offset, - dscene->attributes_float2, - attr_float2_offset, - dscene->attributes_float3, - attr_float3_offset, - dscene->attributes_uchar4, - attr_uchar4_offset, - triangle_mattr, - ATTR_PRIM_TRIANGLE, - req.triangle_type, - req.triangle_desc); - - update_attribute_element_offset(mesh, - dscene->attributes_float, - attr_float_offset, - dscene->attributes_float2, - attr_float2_offset, - dscene->attributes_float3, - attr_float3_offset, - dscene->attributes_uchar4, - attr_uchar4_offset, - curve_mattr, - ATTR_PRIM_CURVE, - req.curve_type, - req.curve_desc); - - update_attribute_element_offset(mesh, - dscene->attributes_float, - attr_float_offset, - dscene->attributes_float2, - attr_float2_offset, - dscene->attributes_float3, - attr_float3_offset, - dscene->attributes_uchar4, - attr_uchar4_offset, - subd_mattr, - ATTR_PRIM_SUBD, - req.subd_type, - req.subd_desc); - - if (progress.get_cancel()) - return; - } - } - - /* create attribute lookup maps */ - if (scene->shader_manager->use_osl()) - update_osl_attributes(device, scene, mesh_attributes); - - update_svm_attributes(device, dscene, scene, mesh_attributes); - - if (progress.get_cancel()) - return; - - /* copy to device */ - progress.set_status("Updating Mesh", "Copying Attributes to device"); - - if (dscene->attributes_float.size()) { - dscene->attributes_float.copy_to_device(); - } - if (dscene->attributes_float2.size()) { - dscene->attributes_float2.copy_to_device(); - } - if (dscene->attributes_float3.size()) { - dscene->attributes_float3.copy_to_device(); - } - if (dscene->attributes_uchar4.size()) { - dscene->attributes_uchar4.copy_to_device(); - } - - if (progress.get_cancel()) - return; - - /* After mesh attributes and patch tables have been copied to device memory, - * we need to update offsets in the objects. */ - scene->object_manager->device_update_mesh_offsets(device, dscene, scene); -} - -void MeshManager::mesh_calc_offset(Scene *scene) -{ - size_t vert_size = 0; - size_t tri_size = 0; - - size_t curve_key_size = 0; - size_t curve_size = 0; - - size_t patch_size = 0; - size_t face_size = 0; - size_t corner_size = 0; - - size_t prim_size = 0; - - foreach (Mesh *mesh, scene->meshes) { - mesh->vert_offset = vert_size; - mesh->tri_offset = tri_size; - - mesh->curvekey_offset = curve_key_size; - mesh->curve_offset = curve_size; - - mesh->patch_offset = patch_size; - mesh->face_offset = face_size; - mesh->corner_offset = corner_size; - - vert_size += mesh->verts.size(); - tri_size += mesh->num_triangles(); - - curve_key_size += mesh->curve_keys.size(); - curve_size += mesh->num_curves(); - - if (mesh->subd_faces.size()) { - Mesh::SubdFace &last = mesh->subd_faces[mesh->subd_faces.size() - 1]; - patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8; - - /* patch tables are stored in same array so include them in patch_size */ - if (mesh->patch_table) { - mesh->patch_table_offset = patch_size; - patch_size += mesh->patch_table->total_size(); - } - } - face_size += mesh->subd_faces.size(); - corner_size += mesh->subd_face_corners.size(); - - mesh->prim_offset = prim_size; - prim_size += mesh->num_primitives(); - } -} - -void MeshManager::device_update_mesh( - Device *, DeviceScene *dscene, Scene *scene, bool for_displacement, Progress &progress) -{ - /* Count. */ - size_t vert_size = 0; - size_t tri_size = 0; - - size_t curve_key_size = 0; - size_t curve_size = 0; - - size_t patch_size = 0; - - foreach (Mesh *mesh, scene->meshes) { - vert_size += mesh->verts.size(); - tri_size += mesh->num_triangles(); - - curve_key_size += mesh->curve_keys.size(); - curve_size += mesh->num_curves(); - - if (mesh->subd_faces.size()) { - Mesh::SubdFace &last = mesh->subd_faces[mesh->subd_faces.size() - 1]; - patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8; - - /* patch tables are stored in same array so include them in patch_size */ - if (mesh->patch_table) { - mesh->patch_table_offset = patch_size; - patch_size += mesh->patch_table->total_size(); - } - } - } - - /* Create mapping from triangle to primitive triangle array. */ - vector<uint> tri_prim_index(tri_size); - if (for_displacement) { - /* For displacement kernels we do some trickery to make them believe - * we've got all required data ready. However, that data is different - * from final render kernels since we don't have BVH yet, so can't - * really use same semantic of arrays. - */ - foreach (Mesh *mesh, scene->meshes) { - for (size_t i = 0; i < mesh->num_triangles(); ++i) { - tri_prim_index[i + mesh->tri_offset] = 3 * (i + mesh->tri_offset); - } - } - } - else { - for (size_t i = 0; i < dscene->prim_index.size(); ++i) { - if ((dscene->prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) { - tri_prim_index[dscene->prim_index[i]] = dscene->prim_tri_index[i]; - } - } - } - - /* Fill in all the arrays. */ - if (tri_size != 0) { - /* normals */ - progress.set_status("Updating Mesh", "Computing normals"); - - uint *tri_shader = dscene->tri_shader.alloc(tri_size); - float4 *vnormal = dscene->tri_vnormal.alloc(vert_size); - uint4 *tri_vindex = dscene->tri_vindex.alloc(tri_size); - uint *tri_patch = dscene->tri_patch.alloc(tri_size); - float2 *tri_patch_uv = dscene->tri_patch_uv.alloc(vert_size); - - foreach (Mesh *mesh, scene->meshes) { - mesh->pack_shaders(scene, &tri_shader[mesh->tri_offset]); - mesh->pack_normals(&vnormal[mesh->vert_offset]); - mesh->pack_verts(tri_prim_index, - &tri_vindex[mesh->tri_offset], - &tri_patch[mesh->tri_offset], - &tri_patch_uv[mesh->vert_offset], - mesh->vert_offset, - mesh->tri_offset); - if (progress.get_cancel()) - return; - } - - /* vertex coordinates */ - progress.set_status("Updating Mesh", "Copying Mesh to device"); - - dscene->tri_shader.copy_to_device(); - dscene->tri_vnormal.copy_to_device(); - dscene->tri_vindex.copy_to_device(); - dscene->tri_patch.copy_to_device(); - dscene->tri_patch_uv.copy_to_device(); - } - - if (curve_size != 0) { - progress.set_status("Updating Mesh", "Copying Strands to device"); - - float4 *curve_keys = dscene->curve_keys.alloc(curve_key_size); - float4 *curves = dscene->curves.alloc(curve_size); - - foreach (Mesh *mesh, scene->meshes) { - mesh->pack_curves(scene, - &curve_keys[mesh->curvekey_offset], - &curves[mesh->curve_offset], - mesh->curvekey_offset); - if (progress.get_cancel()) - return; - } - - dscene->curve_keys.copy_to_device(); - dscene->curves.copy_to_device(); - } - - if (patch_size != 0) { - progress.set_status("Updating Mesh", "Copying Patches to device"); - - uint *patch_data = dscene->patches.alloc(patch_size); - - foreach (Mesh *mesh, scene->meshes) { - mesh->pack_patches(&patch_data[mesh->patch_offset], - mesh->vert_offset, - mesh->face_offset, - mesh->corner_offset); - - if (mesh->patch_table) { - mesh->patch_table->copy_adjusting_offsets(&patch_data[mesh->patch_table_offset], - mesh->patch_table_offset); - } - - if (progress.get_cancel()) - return; - } - - dscene->patches.copy_to_device(); - } - - if (for_displacement) { - float4 *prim_tri_verts = dscene->prim_tri_verts.alloc(tri_size * 3); - foreach (Mesh *mesh, scene->meshes) { - for (size_t i = 0; i < mesh->num_triangles(); ++i) { - Mesh::Triangle t = mesh->get_triangle(i); - size_t offset = 3 * (i + mesh->tri_offset); - prim_tri_verts[offset + 0] = float3_to_float4(mesh->verts[t.v[0]]); - prim_tri_verts[offset + 1] = float3_to_float4(mesh->verts[t.v[1]]); - prim_tri_verts[offset + 2] = float3_to_float4(mesh->verts[t.v[2]]); - } - } - dscene->prim_tri_verts.copy_to_device(); - } -} - -void MeshManager::device_update_bvh(Device *device, - DeviceScene *dscene, - Scene *scene, - Progress &progress) -{ - /* bvh build */ - progress.set_status("Updating Scene BVH", "Building"); - - BVHParams bparams; - bparams.top_level = true; - bparams.bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout, - device->get_bvh_layout_mask()); - bparams.use_spatial_split = scene->params.use_bvh_spatial_split; - bparams.use_unaligned_nodes = dscene->data.bvh.have_curves && - scene->params.use_bvh_unaligned_nodes; - bparams.num_motion_triangle_steps = scene->params.num_bvh_time_steps; - bparams.num_motion_curve_steps = scene->params.num_bvh_time_steps; - bparams.bvh_type = scene->params.bvh_type; - bparams.curve_flags = dscene->data.curve.curveflags; - bparams.curve_subdivisions = dscene->data.curve.subdivisions; - - VLOG(1) << "Using " << bvh_layout_name(bparams.bvh_layout) << " layout."; - -#ifdef WITH_EMBREE - if (bparams.bvh_layout == BVH_LAYOUT_EMBREE) { - if (dscene->data.bvh.scene) { - BVHEmbree::destroy(dscene->data.bvh.scene); - } - } -#endif - - BVH *bvh = BVH::create(bparams, scene->meshes, scene->objects); - bvh->build(progress, &device->stats); - - if (progress.get_cancel()) { -#ifdef WITH_EMBREE - if (bparams.bvh_layout == BVH_LAYOUT_EMBREE) { - if (dscene->data.bvh.scene) { - BVHEmbree::destroy(dscene->data.bvh.scene); - } - } -#endif - delete bvh; - return; - } - - /* copy to device */ - progress.set_status("Updating Scene BVH", "Copying BVH to device"); - - PackedBVH &pack = bvh->pack; - - if (pack.nodes.size()) { - dscene->bvh_nodes.steal_data(pack.nodes); - dscene->bvh_nodes.copy_to_device(); - } - if (pack.leaf_nodes.size()) { - dscene->bvh_leaf_nodes.steal_data(pack.leaf_nodes); - dscene->bvh_leaf_nodes.copy_to_device(); - } - if (pack.object_node.size()) { - dscene->object_node.steal_data(pack.object_node); - dscene->object_node.copy_to_device(); - } - if (pack.prim_tri_index.size()) { - dscene->prim_tri_index.steal_data(pack.prim_tri_index); - dscene->prim_tri_index.copy_to_device(); - } - if (pack.prim_tri_verts.size()) { - dscene->prim_tri_verts.steal_data(pack.prim_tri_verts); - dscene->prim_tri_verts.copy_to_device(); - } - if (pack.prim_type.size()) { - dscene->prim_type.steal_data(pack.prim_type); - dscene->prim_type.copy_to_device(); - } - if (pack.prim_visibility.size()) { - dscene->prim_visibility.steal_data(pack.prim_visibility); - dscene->prim_visibility.copy_to_device(); - } - if (pack.prim_index.size()) { - dscene->prim_index.steal_data(pack.prim_index); - dscene->prim_index.copy_to_device(); - } - if (pack.prim_object.size()) { - dscene->prim_object.steal_data(pack.prim_object); - dscene->prim_object.copy_to_device(); - } - if (pack.prim_time.size()) { - dscene->prim_time.steal_data(pack.prim_time); - dscene->prim_time.copy_to_device(); - } - - dscene->data.bvh.root = pack.root_index; - dscene->data.bvh.bvh_layout = bparams.bvh_layout; - dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0); - - bvh->copy_to_device(progress, dscene); - - delete bvh; -} - -void MeshManager::device_update_preprocess(Device *device, Scene *scene, Progress &progress) -{ - if (!need_update && !need_flags_update) { - return; - } - - progress.set_status("Updating Meshes Flags"); - - /* Update flags. */ - bool volume_images_updated = false; - - foreach (Mesh *mesh, scene->meshes) { - mesh->has_volume = false; - - foreach (const Shader *shader, mesh->used_shaders) { - if (shader->has_volume) { - mesh->has_volume = true; - } - if (shader->has_surface_bssrdf) { - mesh->has_surface_bssrdf = true; - } - } - - if (need_update && mesh->has_volume) { - /* Create volume meshes if there is voxel data. */ - if (mesh->has_voxel_attributes()) { - if (!volume_images_updated) { - progress.set_status("Updating Meshes Volume Bounds"); - device_update_volume_images(device, scene, progress); - volume_images_updated = true; - } - - create_volume_mesh(scene, mesh, progress); - } - } - } - - need_flags_update = false; -} - -void MeshManager::device_update_displacement_images(Device *device, - Scene *scene, - Progress &progress) -{ - progress.set_status("Updating Displacement Images"); - TaskPool pool; - ImageManager *image_manager = scene->image_manager; - set<int> bump_images; - foreach (Mesh *mesh, scene->meshes) { - if (mesh->need_update) { - foreach (Shader *shader, mesh->used_shaders) { - if (!shader->has_displacement || shader->displacement_method == DISPLACE_BUMP) { - continue; - } - foreach (ShaderNode *node, shader->graph->nodes) { - if (node->special_type != SHADER_SPECIAL_TYPE_IMAGE_SLOT) { - continue; - } - - ImageSlotTextureNode *image_node = static_cast<ImageSlotTextureNode *>(node); - foreach (int slot, image_node->slots) { - if (slot != -1) { - bump_images.insert(slot); - } - } - } - } - } - } - foreach (int slot, bump_images) { - pool.push(function_bind( - &ImageManager::device_update_slot, image_manager, device, scene, slot, &progress)); - } - pool.wait_work(); -} - -void MeshManager::device_update_volume_images(Device *device, Scene *scene, Progress &progress) -{ - progress.set_status("Updating Volume Images"); - TaskPool pool; - ImageManager *image_manager = scene->image_manager; - set<int> volume_images; - - foreach (Mesh *mesh, scene->meshes) { - if (!mesh->need_update) { - continue; - } - - foreach (Attribute &attr, mesh->attributes.attributes) { - if (attr.element != ATTR_ELEMENT_VOXEL) { - continue; - } - - VoxelAttribute *voxel = attr.data_voxel(); - - if (voxel->slot != -1) { - volume_images.insert(voxel->slot); - } - } - } - - foreach (int slot, volume_images) { - pool.push(function_bind( - &ImageManager::device_update_slot, image_manager, device, scene, slot, &progress)); - } - pool.wait_work(); -} - -void MeshManager::device_update(Device *device, - DeviceScene *dscene, - Scene *scene, - Progress &progress) -{ - if (!need_update) - return; - - VLOG(1) << "Total " << scene->meshes.size() << " meshes."; - - bool true_displacement_used = false; - size_t total_tess_needed = 0; - - foreach (Mesh *mesh, scene->meshes) { - foreach (Shader *shader, mesh->used_shaders) { - if (shader->need_update_mesh) - mesh->need_update = true; - } - - if (mesh->need_update) { - /* Update normals. */ - mesh->add_face_normals(); - mesh->add_vertex_normals(); - - if (mesh->need_attribute(scene, ATTR_STD_POSITION_UNDISPLACED)) { - mesh->add_undisplaced(); - } - - /* Test if we need tessellation. */ - if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE && mesh->num_subd_verts == 0 && - mesh->subd_params) { - total_tess_needed++; - } - - /* Test if we need displacement. */ - if (mesh->has_true_displacement()) { - true_displacement_used = true; - } - - if (progress.get_cancel()) - return; - } - } - - /* Tessellate meshes that are using subdivision */ - if (total_tess_needed) { - Camera *dicing_camera = scene->dicing_camera; - dicing_camera->update(scene); - - size_t i = 0; - foreach (Mesh *mesh, scene->meshes) { - if (mesh->need_update && mesh->subdivision_type != Mesh::SUBDIVISION_NONE && - mesh->num_subd_verts == 0 && mesh->subd_params) { - string msg = "Tessellating "; - if (mesh->name == "") - msg += string_printf("%u/%u", (uint)(i + 1), (uint)total_tess_needed); - else - msg += string_printf( - "%s %u/%u", mesh->name.c_str(), (uint)(i + 1), (uint)total_tess_needed); - - progress.set_status("Updating Mesh", msg); - - mesh->subd_params->camera = dicing_camera; - DiagSplit dsplit(*mesh->subd_params); - mesh->tessellate(&dsplit); - - i++; - - if (progress.get_cancel()) - return; - } - } - } - - /* Update images needed for true displacement. */ - bool old_need_object_flags_update = false; - if (true_displacement_used) { - VLOG(1) << "Updating images used for true displacement."; - device_update_displacement_images(device, scene, progress); - old_need_object_flags_update = scene->object_manager->need_flags_update; - scene->object_manager->device_update_flags(device, dscene, scene, progress, false); - } - - /* Device update. */ - device_free(device, dscene); - - mesh_calc_offset(scene); - if (true_displacement_used) { - device_update_mesh(device, dscene, scene, true, progress); - } - if (progress.get_cancel()) - return; - - device_update_attributes(device, dscene, scene, progress); - if (progress.get_cancel()) - return; - - /* Update displacement. */ - bool displacement_done = false; - size_t num_bvh = 0; - BVHLayout bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout, - device->get_bvh_layout_mask()); - - foreach (Mesh *mesh, scene->meshes) { - if (mesh->need_update) { - if (displace(device, dscene, scene, mesh, progress)) { - displacement_done = true; - } - - if (mesh->need_build_bvh(bvh_layout)) { - num_bvh++; - } - } - - if (progress.get_cancel()) - return; - } - - /* Device re-update after displacement. */ - if (displacement_done) { - device_free(device, dscene); - - device_update_attributes(device, dscene, scene, progress); - if (progress.get_cancel()) - return; - } - - TaskPool pool; - - size_t i = 0; - foreach (Mesh *mesh, scene->meshes) { - if (mesh->need_update) { - pool.push(function_bind( - &Mesh::compute_bvh, mesh, device, dscene, &scene->params, &progress, i, num_bvh)); - if (mesh->need_build_bvh(bvh_layout)) { - i++; - } - } - } - - TaskPool::Summary summary; - pool.wait_work(&summary); - VLOG(2) << "Objects BVH build pool statistics:\n" << summary.full_report(); - - foreach (Shader *shader, scene->shaders) { - shader->need_update_mesh = false; - } - - Scene::MotionType need_motion = scene->need_motion(); - bool motion_blur = need_motion == Scene::MOTION_BLUR; - - /* Update objects. */ - vector<Object *> volume_objects; - foreach (Object *object, scene->objects) { - object->compute_bounds(motion_blur); - } - - if (progress.get_cancel()) - return; - - device_update_bvh(device, dscene, scene, progress); - if (progress.get_cancel()) - return; - - device_update_mesh(device, dscene, scene, false, progress); - if (progress.get_cancel()) - return; - - need_update = false; - - if (true_displacement_used) { - /* Re-tag flags for update, so they're re-evaluated - * for meshes with correct bounding boxes. - * - * This wouldn't cause wrong results, just true - * displacement might be less optimal ot calculate. - */ - scene->object_manager->need_flags_update = old_need_object_flags_update; - } -} - -void MeshManager::device_free(Device *device, DeviceScene *dscene) -{ - dscene->bvh_nodes.free(); - dscene->bvh_leaf_nodes.free(); - dscene->object_node.free(); - dscene->prim_tri_verts.free(); - dscene->prim_tri_index.free(); - dscene->prim_type.free(); - dscene->prim_visibility.free(); - dscene->prim_index.free(); - dscene->prim_object.free(); - dscene->prim_time.free(); - dscene->tri_shader.free(); - dscene->tri_vnormal.free(); - dscene->tri_vindex.free(); - dscene->tri_patch.free(); - dscene->tri_patch_uv.free(); - dscene->curves.free(); - dscene->curve_keys.free(); - dscene->patches.free(); - dscene->attributes_map.free(); - dscene->attributes_float.free(); - dscene->attributes_float2.free(); - dscene->attributes_float3.free(); - dscene->attributes_uchar4.free(); - - /* Signal for shaders like displacement not to do ray tracing. */ - dscene->data.bvh.bvh_layout = BVH_LAYOUT_NONE; - -#ifdef WITH_OSL - OSLGlobals *og = (OSLGlobals *)device->osl_memory(); - - if (og) { - og->object_name_map.clear(); - og->attribute_map.clear(); - og->object_names.clear(); - } -#else - (void)device; -#endif -} - -void MeshManager::tag_update(Scene *scene) -{ - need_update = true; - scene->object_manager->need_update = true; -} - -void MeshManager::collect_statistics(const Scene *scene, RenderStats *stats) -{ - foreach (Mesh *mesh, scene->meshes) { - stats->mesh.geometry.add_entry( - NamedSizeEntry(string(mesh->name.c_str()), mesh->get_total_size_in_bytes())); - } -} - -bool Mesh::need_attribute(Scene *scene, AttributeStandard std) -{ - if (std == ATTR_STD_NONE) - return false; - - if (scene->need_global_attribute(std)) - return true; - - foreach (Shader *shader, used_shaders) - if (shader->attributes.find(std)) - return true; - - return false; -} - -bool Mesh::need_attribute(Scene * /*scene*/, ustring name) -{ - if (name == ustring()) - return false; - - foreach (Shader *shader, used_shaders) - if (shader->attributes.find(name)) - return true; - - return false; -} - CCL_NAMESPACE_END diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h index c5be0ba60b9..d0cf4d557aa 100644 --- a/intern/cycles/render/mesh.h +++ b/intern/cycles/render/mesh.h @@ -21,6 +21,7 @@ #include "bvh/bvh_params.h" #include "render/attribute.h" +#include "render/geometry.h" #include "render/shader.h" #include "util/util_array.h" @@ -29,7 +30,6 @@ #include "util/util_map.h" #include "util/util_param.h" #include "util/util_set.h" -#include "util/util_transform.h" #include "util/util_types.h" #include "util/util_vector.h" @@ -51,7 +51,7 @@ struct PackedPatchTable; /* Mesh */ -class Mesh : public Node { +class Mesh : public Geometry { public: NODE_DECLARE @@ -91,94 +91,6 @@ class Mesh : public Node { return triangles.size() / 3; } - /* Mesh Curve */ - struct Curve { - int first_key; - int num_keys; - - int num_segments() const - { - return num_keys - 1; - } - - void bounds_grow(const int k, - const float3 *curve_keys, - const float *curve_radius, - BoundBox &bounds) const; - void bounds_grow(float4 keys[4], BoundBox &bounds) const; - void bounds_grow(const int k, - const float3 *curve_keys, - const float *curve_radius, - const Transform &aligned_space, - BoundBox &bounds) const; - - void motion_keys(const float3 *curve_keys, - const float *curve_radius, - const float3 *key_steps, - size_t num_curve_keys, - size_t num_steps, - float time, - size_t k0, - size_t k1, - float4 r_keys[2]) const; - void cardinal_motion_keys(const float3 *curve_keys, - const float *curve_radius, - const float3 *key_steps, - size_t num_curve_keys, - size_t num_steps, - float time, - size_t k0, - size_t k1, - size_t k2, - size_t k3, - float4 r_keys[4]) const; - - void keys_for_step(const float3 *curve_keys, - const float *curve_radius, - const float3 *key_steps, - size_t num_curve_keys, - size_t num_steps, - size_t step, - size_t k0, - size_t k1, - float4 r_keys[2]) const; - void cardinal_keys_for_step(const float3 *curve_keys, - const float *curve_radius, - const float3 *key_steps, - size_t num_curve_keys, - size_t num_steps, - size_t step, - size_t k0, - size_t k1, - size_t k2, - size_t k3, - float4 r_keys[4]) const; - }; - - Curve get_curve(size_t i) const - { - int first = curve_first_key[i]; - int next_first = (i + 1 < curve_first_key.size()) ? curve_first_key[i + 1] : curve_keys.size(); - - Curve curve = {first, next_first - first}; - return curve; - } - - size_t num_curves() const - { - return curve_first_key.size(); - } - - size_t num_segments() const - { - return curve_keys.size() - curve_first_key.size(); - } - - size_t num_primitives() const - { - return num_triangles() + num_segments(); - } - /* Mesh SubdFace */ struct SubdFace { int start_corner; @@ -212,14 +124,6 @@ class Mesh : public Node { SubdivisionType subdivision_type; /* Mesh Data */ - enum GeometryFlags { - GEOMETRY_NONE = 0, - GEOMETRY_TRIANGLES = (1 << 0), - GEOMETRY_CURVES = (1 << 1), - }; - int geometry_flags; /* used to distinguish meshes with no verts - and meshed for which geometry is not created */ - array<int> triangles; array<float3> verts; array<int> shader; @@ -229,14 +133,9 @@ class Mesh : public Node { array<int> triangle_patch; /* must be < 0 for non subd triangles */ array<float2> vert_patch_uv; - float volume_isovalue; - bool has_volume; /* Set in the device_update_flags(). */ - bool has_surface_bssrdf; /* Set in the device_update_flags(). */ - - array<float3> curve_keys; - array<float> curve_radius; - array<int> curve_first_key; - array<int> curve_shader; + float volume_clipping; + float volume_step_size; + bool volume_object_space; array<SubdFace> subd_faces; array<int> subd_face_corners; @@ -246,42 +145,18 @@ class Mesh : public Node { SubdParams *subd_params; - vector<Shader *> used_shaders; - AttributeSet attributes; - AttributeSet curve_attributes; AttributeSet subd_attributes; - BoundBox bounds; - bool transform_applied; - bool transform_negative_scaled; - Transform transform_normal; - PackedPatchTable *patch_table; - uint motion_steps; - bool use_motion_blur; - - /* Update Flags */ - bool need_update; - bool need_update_rebuild; - /* BVH */ - BVH *bvh; - size_t tri_offset; size_t vert_offset; - size_t curve_offset; - size_t curvekey_offset; - size_t patch_offset; size_t patch_table_offset; size_t face_offset; size_t corner_offset; - size_t attr_map_offset; - - size_t prim_offset; - size_t num_subd_verts; private: @@ -289,7 +164,7 @@ class Mesh : public Node { unordered_multimap<int, int> vert_stitching_map; /* stitching index -> multiple real vert indices */ friend class DiagSplit; - friend class MeshManager; + friend class GeometryManager; public: /* Functions */ @@ -298,24 +173,24 @@ class Mesh : public Node { void resize_mesh(int numverts, int numfaces); void reserve_mesh(int numverts, int numfaces); - void resize_curves(int numcurves, int numkeys); - void reserve_curves(int numcurves, int numkeys); void resize_subd_faces(int numfaces, int num_ngons, int numcorners); void reserve_subd_faces(int numfaces, int num_ngons, int numcorners); - void clear(bool preserve_voxel_data = false); + void clear(bool preserve_voxel_data); + void clear() override; void add_vertex(float3 P); void add_vertex_slow(float3 P); void add_triangle(int v0, int v1, int v2, int shader, bool smooth); - void add_curve_key(float3 loc, float radius); - void add_curve(int first_key, int shader); void add_subd_face(int *corners, int num_corners, int shader_, bool smooth_); - void compute_bounds(); + void copy_center_to_motion_step(const int motion_step); + + void compute_bounds() override; + void apply_transform(const Transform &tfm, const bool apply_to_motion) override; void add_face_normals(); void add_vertex_normals(); void add_undisplaced(); - void get_uv_tiles(ustring map, unordered_set<int> &tiles); + void get_uv_tiles(ustring map, unordered_set<int> &tiles) override; void pack_shaders(Scene *scene, uint *shader); void pack_normals(float4 *vnormal); @@ -325,103 +200,11 @@ class Mesh : public Node { float2 *tri_patch_uv, size_t vert_offset, size_t tri_offset); - void pack_curves(Scene *scene, float4 *curve_key_co, float4 *curve_data, size_t curvekey_offset); void pack_patches(uint *patch_data, uint vert_offset, uint face_offset, uint corner_offset); - void compute_bvh(Device *device, - DeviceScene *dscene, - SceneParams *params, - Progress *progress, - int n, - int total); - - bool need_attribute(Scene *scene, AttributeStandard std); - bool need_attribute(Scene *scene, ustring name); - - void tag_update(Scene *scene, bool rebuild); - - bool has_motion_blur() const; - bool has_true_displacement() const; - bool has_voxel_attributes() const; - - /* Convert between normalized -1..1 motion time and index - * in the VERTEX_MOTION attribute. */ - float motion_time(int step) const; - int motion_step(float time) const; - - /* Check whether the mesh should have own BVH built separately. Briefly, - * own BVH is needed for mesh, if: - * - * - It is instanced multiple times, so each instance object should share the - * same BVH tree. - * - Special ray intersection is needed, for example to limit subsurface rays - * to only the mesh itself. - * - The BVH layout requires the top level to only contain instances. - */ - bool need_build_bvh(BVHLayout layout) const; - - /* Check if the mesh should be treated as instanced. */ - bool is_instanced() const; - void tessellate(DiagSplit *split); }; -/* Mesh Manager */ - -class MeshManager { - public: - bool need_update; - bool need_flags_update; - - MeshManager(); - ~MeshManager(); - - bool displace(Device *device, DeviceScene *dscene, Scene *scene, Mesh *mesh, Progress &progress); - - /* attributes */ - void update_osl_attributes(Device *device, - Scene *scene, - vector<AttributeRequestSet> &mesh_attributes); - void update_svm_attributes(Device *device, - DeviceScene *dscene, - Scene *scene, - vector<AttributeRequestSet> &mesh_attributes); - - void device_update_preprocess(Device *device, Scene *scene, Progress &progress); - void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress); - - void device_free(Device *device, DeviceScene *dscene); - - void tag_update(Scene *scene); - - void create_volume_mesh(Scene *scene, Mesh *mesh, Progress &progress); - - void collect_statistics(const Scene *scene, RenderStats *stats); - - protected: - /* Calculate verts/triangles/curves offsets in global arrays. */ - void mesh_calc_offset(Scene *scene); - - void device_update_object(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress); - - void device_update_mesh(Device *device, - DeviceScene *dscene, - Scene *scene, - bool for_displacement, - Progress &progress); - - void device_update_attributes(Device *device, - DeviceScene *dscene, - Scene *scene, - Progress &progress); - - void device_update_bvh(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress); - - void device_update_displacement_images(Device *device, Scene *scene, Progress &progress); - - void device_update_volume_images(Device *device, Scene *scene, Progress &progress); -}; - CCL_NAMESPACE_END #endif /* __MESH_H__ */ diff --git a/intern/cycles/render/mesh_displace.cpp b/intern/cycles/render/mesh_displace.cpp index 6a6c2fbb3eb..467810f9273 100644 --- a/intern/cycles/render/mesh_displace.cpp +++ b/intern/cycles/render/mesh_displace.cpp @@ -43,7 +43,7 @@ static float3 compute_face_normal(const Mesh::Triangle &t, float3 *verts) return norm / normlen; } -bool MeshManager::displace( +bool GeometryManager::displace( Device *device, DeviceScene *dscene, Scene *scene, Mesh *mesh, Progress &progress) { /* verify if we have a displacement shader */ @@ -58,7 +58,7 @@ bool MeshManager::displace( size_t object_index = OBJECT_NONE; for (size_t i = 0; i < scene->objects.size(); i++) { - if (scene->objects[i]->mesh == mesh) { + if (scene->objects[i]->geometry == mesh) { object_index = i; break; } @@ -91,7 +91,7 @@ bool MeshManager::displace( /* set up object, primitive and barycentric coordinates */ int object = object_index; - int prim = mesh->tri_offset + i; + int prim = mesh->prim_offset + i; float u, v; switch (j) { diff --git a/intern/cycles/render/mesh_subdivision.cpp b/intern/cycles/render/mesh_subdivision.cpp index 40dd658eadd..3d72b2fab91 100644 --- a/intern/cycles/render/mesh_subdivision.cpp +++ b/intern/cycles/render/mesh_subdivision.cpp @@ -14,16 +14,16 @@ * limitations under the License. */ -#include "render/mesh.h" #include "render/attribute.h" #include "render/camera.h" +#include "render/mesh.h" -#include "subd/subd_split.h" #include "subd/subd_patch.h" #include "subd/subd_patch_table.h" +#include "subd/subd_split.h" -#include "util/util_foreach.h" #include "util/util_algorithm.h" +#include "util/util_foreach.h" #include "util/util_hash.h" CCL_NAMESPACE_BEGIN @@ -32,10 +32,10 @@ CCL_NAMESPACE_BEGIN CCL_NAMESPACE_END -# include <opensubdiv/far/topologyRefinerFactory.h> -# include <opensubdiv/far/primvarRefiner.h> -# include <opensubdiv/far/patchTableFactory.h> # include <opensubdiv/far/patchMap.h> +# include <opensubdiv/far/patchTableFactory.h> +# include <opensubdiv/far/primvarRefiner.h> +# include <opensubdiv/far/topologyRefinerFactory.h> /* specializations of TopologyRefinerFactory for ccl::Mesh */ diff --git a/intern/cycles/render/mesh_volume.cpp b/intern/cycles/render/mesh_volume.cpp index f451b58e92a..d73ba3b06dd 100644 --- a/intern/cycles/render/mesh_volume.cpp +++ b/intern/cycles/render/mesh_volume.cpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "render/mesh.h" #include "render/attribute.h" +#include "render/mesh.h" #include "render/scene.h" #include "util/util_foreach.h" @@ -362,7 +362,7 @@ struct VoxelAttributeGrid { int channels; }; -void MeshManager::create_volume_mesh(Scene *scene, Mesh *mesh, Progress &progress) +void GeometryManager::create_volume_mesh(Mesh *mesh, Progress &progress) { string msg = string_printf("Computing Volume Mesh %s", mesh->name.c_str()); progress.set_status("Updating Mesh", msg); @@ -373,13 +373,15 @@ void MeshManager::create_volume_mesh(Scene *scene, Mesh *mesh, Progress &progres VolumeParams volume_params; volume_params.resolution = make_int3(0, 0, 0); + Transform transform = transform_identity(); + foreach (Attribute &attr, mesh->attributes.attributes) { if (attr.element != ATTR_ELEMENT_VOXEL) { continue; } - VoxelAttribute *voxel = attr.data_voxel(); - device_memory *image_memory = scene->image_manager->image_memory(voxel->slot); + ImageHandle &handle = attr.data_voxel(); + device_texture *image_memory = handle.image_memory(); int3 resolution = make_int3( image_memory->data_width, image_memory->data_height, image_memory->data_depth); @@ -387,14 +389,20 @@ void MeshManager::create_volume_mesh(Scene *scene, Mesh *mesh, Progress &progres volume_params.resolution = resolution; } else if (volume_params.resolution != resolution) { - VLOG(1) << "Can't create volume mesh, all voxel grid resolutions must be equal\n"; - return; + /* TODO: support this as it's common for OpenVDB. */ + VLOG(1) << "Can't create accurate volume mesh, all voxel grid resolutions must be equal\n"; + continue; } VoxelAttributeGrid voxel_grid; voxel_grid.data = static_cast<float *>(image_memory->host_pointer); voxel_grid.channels = image_memory->data_elements; voxel_grids.push_back(voxel_grid); + + /* TODO: support multiple transforms. */ + if (image_memory->info.use_transform_3d) { + transform = image_memory->info.transform_3d; + } } if (voxel_grids.empty()) { @@ -427,17 +435,14 @@ void MeshManager::create_volume_mesh(Scene *scene, Mesh *mesh, Progress &progres } /* Compute start point and cell size from transform. */ - Attribute *attr = mesh->attributes.find(ATTR_STD_GENERATED_TRANSFORM); const int3 resolution = volume_params.resolution; float3 start_point = make_float3(0.0f, 0.0f, 0.0f); float3 cell_size = make_float3(1.0f / resolution.x, 1.0f / resolution.y, 1.0f / resolution.z); - if (attr) { - const Transform *tfm = attr->data_transform(); - const Transform itfm = transform_inverse(*tfm); - start_point = transform_point(&itfm, start_point); - cell_size = transform_direction(&itfm, cell_size); - } + /* TODO: support arbitrary transforms, not just scale + translate. */ + const Transform itfm = transform_inverse(transform); + start_point = transform_point(&itfm, start_point); + cell_size = transform_direction(&itfm, cell_size); volume_params.start_point = start_point; volume_params.cell_size = cell_size; @@ -445,7 +450,7 @@ void MeshManager::create_volume_mesh(Scene *scene, Mesh *mesh, Progress &progres /* Build bounding mesh around non-empty volume cells. */ VolumeMeshBuilder builder(&volume_params); - const float isovalue = mesh->volume_isovalue; + const float clipping = mesh->volume_clipping; for (int z = 0; z < resolution.z; ++z) { for (int y = 0; y < resolution.y; ++y) { @@ -457,7 +462,7 @@ void MeshManager::create_volume_mesh(Scene *scene, Mesh *mesh, Progress &progres const int channels = voxel_grid.channels; for (int c = 0; c < channels; c++) { - if (voxel_grid.data[voxel_index * channels + c] >= isovalue) { + if (voxel_grid.data[voxel_index * channels + c] >= clipping) { builder.add_node_with_padding(x, y, z); break; } diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index bdab2a99897..ac07d91c4ca 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -14,27 +14,28 @@ * limitations under the License. */ +#include "render/nodes.h" #include "render/colorspace.h" +#include "render/constant_fold.h" #include "render/film.h" #include "render/image.h" #include "render/integrator.h" #include "render/light.h" #include "render/mesh.h" -#include "render/nodes.h" +#include "render/osl.h" #include "render/scene.h" #include "render/svm.h" -#include "kernel/svm/svm_color_util.h" -#include "kernel/svm/svm_ramp_util.h" -#include "kernel/svm/svm_math_util.h" -#include "kernel/svm/svm_mapping_util.h" -#include "render/osl.h" -#include "render/constant_fold.h" -#include "util/util_sky_model.h" #include "util/util_foreach.h" #include "util/util_logging.h" +#include "util/util_sky_model.h" #include "util/util_transform.h" +#include "kernel/svm/svm_color_util.h" +#include "kernel/svm/svm_mapping_util.h" +#include "kernel/svm/svm_math_util.h" +#include "kernel/svm/svm_ramp_util.h" + CCL_NAMESPACE_BEGIN /* Texture Mapping */ @@ -205,27 +206,6 @@ void TextureMapping::compile(OSLCompiler &compiler) /* Image Texture */ -ImageSlotTextureNode::~ImageSlotTextureNode() -{ - if (image_manager) { - foreach (int slot, slots) { - if (slot != -1) { - image_manager->remove_image(slot); - } - } - } -} - -void ImageSlotTextureNode::add_image_user() const -{ - /* Increase image user count for new node. */ - foreach (int slot, slots) { - if (slot != -1) { - image_manager->add_image_user(slot); - } - } -} - NODE_DEFINE(ImageTextureNode) { NodeType *type = NodeType::add("image_texture", create, NodeType::SHADER); @@ -275,18 +255,27 @@ NODE_DEFINE(ImageTextureNode) ImageTextureNode::ImageTextureNode() : ImageSlotTextureNode(node_type) { - is_float = false; - compress_as_srgb = false; colorspace = u_colorspace_raw; - builtin_data = NULL; animated = false; tiles.push_back(1001); } ShaderNode *ImageTextureNode::clone() const { - add_image_user(); - return new ImageTextureNode(*this); + ImageTextureNode *node = new ImageTextureNode(*this); + node->handle = handle; + return node; +} + +ImageParams ImageTextureNode::image_params() const +{ + ImageParams params; + params.animated = animated; + params.interpolation = interpolation; + params.extension = extension; + params.alpha_type = alpha_type; + params.colorspace = colorspace; + return params; } void ImageTextureNode::cull_tiles(Scene *scene, ShaderGraph *graph) @@ -333,10 +322,10 @@ void ImageTextureNode::cull_tiles(Scene *scene, ShaderGraph *graph) /* TODO(lukas): This is quite inefficient. A fairly simple improvement would * be to have a cache in each mesh that is indexed by attribute. * Additionally, building a graph-to-meshes list once could help. */ - foreach (Mesh *mesh, scene->meshes) { - foreach (Shader *shader, mesh->used_shaders) { + foreach (Geometry *geom, scene->geometry) { + foreach (Shader *shader, geom->used_shaders) { if (shader->graph == graph) { - mesh->get_uv_tiles(attribute, used_tiles); + geom->get_uv_tiles(attribute, used_tiles); } } } @@ -371,123 +360,80 @@ void ImageTextureNode::compile(SVMCompiler &compiler) ShaderOutput *color_out = output("Color"); ShaderOutput *alpha_out = output("Alpha"); - image_manager = compiler.scene->image_manager; - if (slots.empty()) { + if (handle.empty()) { cull_tiles(compiler.scene, compiler.current_graph); - slots.reserve(tiles.size()); - - bool have_metadata = false; - foreach (int tile, tiles) { - string tile_name = filename.string(); - string_replace(tile_name, "<UDIM>", string_printf("%04d", tile)); - - ImageMetaData metadata; - int slot = image_manager->add_image(tile_name, - builtin_data, - animated, - 0, - interpolation, - extension, - alpha_type, - colorspace, - metadata); - slots.push_back(slot); - - /* We assume that all tiles have the same metadata. */ - if (!have_metadata) { - is_float = metadata.is_float; - compress_as_srgb = metadata.compress_as_srgb; - known_colorspace = metadata.colorspace; - have_metadata = true; - } - } + ImageManager *image_manager = compiler.scene->image_manager; + handle = image_manager->add_image(filename.string(), image_params(), tiles); } - bool has_image = false; - foreach (int slot, slots) { - if (slot != -1) { - has_image = true; - break; - } - } + /* All tiles have the same metadata. */ + const ImageMetaData metadata = handle.metadata(); + const bool compress_as_srgb = metadata.compress_as_srgb; + const ustring known_colorspace = metadata.colorspace; - if (has_image) { - int vector_offset = tex_mapping.compile_begin(compiler, vector_in); - uint flags = 0; + int vector_offset = tex_mapping.compile_begin(compiler, vector_in); + uint flags = 0; - if (compress_as_srgb) { - flags |= NODE_IMAGE_COMPRESS_AS_SRGB; + if (compress_as_srgb) { + flags |= NODE_IMAGE_COMPRESS_AS_SRGB; + } + if (!alpha_out->links.empty()) { + const bool unassociate_alpha = !(ColorSpaceManager::colorspace_is_data(colorspace) || + alpha_type == IMAGE_ALPHA_CHANNEL_PACKED || + alpha_type == IMAGE_ALPHA_IGNORE); + + if (unassociate_alpha) { + flags |= NODE_IMAGE_ALPHA_UNASSOCIATE; } - if (!alpha_out->links.empty()) { - const bool unassociate_alpha = !(ColorSpaceManager::colorspace_is_data(colorspace) || - alpha_type == IMAGE_ALPHA_CHANNEL_PACKED || - alpha_type == IMAGE_ALPHA_IGNORE); + } - if (unassociate_alpha) { - flags |= NODE_IMAGE_ALPHA_UNASSOCIATE; - } + if (projection != NODE_IMAGE_PROJ_BOX) { + /* If there only is one image (a very common case), we encode it as a negative value. */ + int num_nodes; + if (handle.num_tiles() == 1) { + num_nodes = -handle.svm_slot(); + } + else { + num_nodes = divide_up(handle.num_tiles(), 2); } - if (projection != NODE_IMAGE_PROJ_BOX) { - /* If there only is one image (a very common case), we encode it as a negative value. */ - int num_nodes; - if (slots.size() == 1) { - num_nodes = -slots[0]; - } - else { - num_nodes = divide_up(slots.size(), 2); - } + compiler.add_node(NODE_TEX_IMAGE, + num_nodes, + compiler.encode_uchar4(vector_offset, + compiler.stack_assign_if_linked(color_out), + compiler.stack_assign_if_linked(alpha_out), + flags), + projection); - compiler.add_node(NODE_TEX_IMAGE, - num_nodes, - compiler.encode_uchar4(vector_offset, - compiler.stack_assign_if_linked(color_out), - compiler.stack_assign_if_linked(alpha_out), - flags), - projection); - - if (num_nodes > 0) { - for (int i = 0; i < num_nodes; i++) { - int4 node; - node.x = tiles[2 * i]; - node.y = slots[2 * i]; - if (2 * i + 1 < slots.size()) { - node.z = tiles[2 * i + 1]; - node.w = slots[2 * i + 1]; - } - else { - node.z = -1; - node.w = -1; - } - compiler.add_node(node.x, node.y, node.z, node.w); + if (num_nodes > 0) { + for (int i = 0; i < num_nodes; i++) { + int4 node; + node.x = tiles[2 * i]; + node.y = handle.svm_slot(2 * i); + if (2 * i + 1 < tiles.size()) { + node.z = tiles[2 * i + 1]; + node.w = handle.svm_slot(2 * i + 1); } + else { + node.z = -1; + node.w = -1; + } + compiler.add_node(node.x, node.y, node.z, node.w); } } - else { - assert(slots.size() == 1); - compiler.add_node(NODE_TEX_IMAGE_BOX, - slots[0], - compiler.encode_uchar4(vector_offset, - compiler.stack_assign_if_linked(color_out), - compiler.stack_assign_if_linked(alpha_out), - flags), - __float_as_int(projection_blend)); - } - - tex_mapping.compile_end(compiler, vector_in, vector_offset); } else { - /* image not found */ - if (!color_out->links.empty()) { - compiler.add_node(NODE_VALUE_V, compiler.stack_assign(color_out)); - compiler.add_node( - NODE_VALUE_V, - make_float3(TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B)); - } - if (!alpha_out->links.empty()) - compiler.add_node( - NODE_VALUE_F, __float_as_int(TEX_IMAGE_MISSING_A), compiler.stack_assign(alpha_out)); + assert(handle.num_tiles() == 1); + compiler.add_node(NODE_TEX_IMAGE_BOX, + handle.svm_slot(), + compiler.encode_uchar4(vector_offset, + compiler.stack_assign_if_linked(color_out), + compiler.stack_assign_if_linked(alpha_out), + flags), + __float_as_int(projection_blend)); } + + tex_mapping.compile_end(compiler, vector_in, vector_offset); } void ImageTextureNode::compile(OSLCompiler &compiler) @@ -496,38 +442,22 @@ void ImageTextureNode::compile(OSLCompiler &compiler) tex_mapping.compile(compiler); - image_manager = compiler.scene->image_manager; - if (slots.size() == 0) { - ImageMetaData metadata; - if (builtin_data == NULL) { - string tile_name = filename.string(); - string_replace(tile_name, "<UDIM>", "1001"); - image_manager->get_image_metadata(tile_name, NULL, colorspace, metadata); - slots.push_back(-1); - } - else { - int slot = image_manager->add_image(filename.string(), - builtin_data, - animated, - 0, - interpolation, - extension, - alpha_type, - colorspace, - metadata); - slots.push_back(slot); - } - is_float = metadata.is_float; - compress_as_srgb = metadata.compress_as_srgb; - known_colorspace = metadata.colorspace; + if (handle.empty()) { + ImageManager *image_manager = compiler.scene->image_manager; + handle = image_manager->add_image(filename.string(), image_params()); } - if (slots[0] == -1) { + const ImageMetaData metadata = handle.metadata(); + const bool is_float = metadata.is_float(); + const bool compress_as_srgb = metadata.compress_as_srgb; + const ustring known_colorspace = metadata.colorspace; + + if (handle.svm_slot() == -1) { compiler.parameter_texture( "filename", filename, compress_as_srgb ? u_colorspace_raw : known_colorspace); } else { - compiler.parameter_texture("filename", slots[0]); + compiler.parameter_texture("filename", handle.svm_slot()); } const bool unassociate_alpha = !(ColorSpaceManager::colorspace_is_data(colorspace) || @@ -589,17 +519,26 @@ NODE_DEFINE(EnvironmentTextureNode) EnvironmentTextureNode::EnvironmentTextureNode() : ImageSlotTextureNode(node_type) { - is_float = false; - compress_as_srgb = false; colorspace = u_colorspace_raw; - builtin_data = NULL; animated = false; } ShaderNode *EnvironmentTextureNode::clone() const { - add_image_user(); - return new EnvironmentTextureNode(*this); + EnvironmentTextureNode *node = new EnvironmentTextureNode(*this); + node->handle = handle; + return node; +} + +ImageParams EnvironmentTextureNode::image_params() const +{ + ImageParams params; + params.animated = animated; + params.interpolation = interpolation; + params.extension = EXTENSION_REPEAT; + params.alpha_type = alpha_type; + params.colorspace = colorspace; + return params; } void EnvironmentTextureNode::attributes(Shader *shader, AttributeRequestSet *attributes) @@ -621,93 +560,53 @@ void EnvironmentTextureNode::compile(SVMCompiler &compiler) ShaderOutput *color_out = output("Color"); ShaderOutput *alpha_out = output("Alpha"); - image_manager = compiler.scene->image_manager; - if (slots.empty()) { - ImageMetaData metadata; - int slot = image_manager->add_image(filename.string(), - builtin_data, - animated, - 0, - interpolation, - EXTENSION_REPEAT, - alpha_type, - colorspace, - metadata); - slots.push_back(slot); - is_float = metadata.is_float; - compress_as_srgb = metadata.compress_as_srgb; - known_colorspace = metadata.colorspace; - } - - if (slots[0] != -1) { - int vector_offset = tex_mapping.compile_begin(compiler, vector_in); - uint flags = 0; - - if (compress_as_srgb) { - flags |= NODE_IMAGE_COMPRESS_AS_SRGB; - } + if (handle.empty()) { + ImageManager *image_manager = compiler.scene->image_manager; + handle = image_manager->add_image(filename.string(), image_params()); + } - compiler.add_node(NODE_TEX_ENVIRONMENT, - slots[0], - compiler.encode_uchar4(vector_offset, - compiler.stack_assign_if_linked(color_out), - compiler.stack_assign_if_linked(alpha_out), - flags), - projection); + const ImageMetaData metadata = handle.metadata(); + const bool compress_as_srgb = metadata.compress_as_srgb; + const ustring known_colorspace = metadata.colorspace; - tex_mapping.compile_end(compiler, vector_in, vector_offset); - } - else { - /* image not found */ - if (!color_out->links.empty()) { - compiler.add_node(NODE_VALUE_V, compiler.stack_assign(color_out)); - compiler.add_node( - NODE_VALUE_V, - make_float3(TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B)); - } - if (!alpha_out->links.empty()) - compiler.add_node( - NODE_VALUE_F, __float_as_int(TEX_IMAGE_MISSING_A), compiler.stack_assign(alpha_out)); + int vector_offset = tex_mapping.compile_begin(compiler, vector_in); + uint flags = 0; + + if (compress_as_srgb) { + flags |= NODE_IMAGE_COMPRESS_AS_SRGB; } + + compiler.add_node(NODE_TEX_ENVIRONMENT, + handle.svm_slot(), + compiler.encode_uchar4(vector_offset, + compiler.stack_assign_if_linked(color_out), + compiler.stack_assign_if_linked(alpha_out), + flags), + projection); + + tex_mapping.compile_end(compiler, vector_in, vector_offset); } void EnvironmentTextureNode::compile(OSLCompiler &compiler) { + if (handle.empty()) { + ImageManager *image_manager = compiler.scene->image_manager; + handle = image_manager->add_image(filename.string(), image_params()); + } + tex_mapping.compile(compiler); - /* See comments in ImageTextureNode::compile about support - * of builtin images. - */ - image_manager = compiler.scene->image_manager; - if (slots.empty()) { - ImageMetaData metadata; - if (builtin_data == NULL) { - image_manager->get_image_metadata(filename.string(), NULL, colorspace, metadata); - slots.push_back(-1); - } - else { - int slot = image_manager->add_image(filename.string(), - builtin_data, - animated, - 0, - interpolation, - EXTENSION_REPEAT, - alpha_type, - colorspace, - metadata); - slots.push_back(slot); - } - is_float = metadata.is_float; - compress_as_srgb = metadata.compress_as_srgb; - known_colorspace = metadata.colorspace; - } + const ImageMetaData metadata = handle.metadata(); + const bool is_float = metadata.is_float(); + const bool compress_as_srgb = metadata.compress_as_srgb; + const ustring known_colorspace = metadata.colorspace; - if (slots[0] == -1) { + if (handle.svm_slot() == -1) { compiler.parameter_texture( "filename", filename, compress_as_srgb ? u_colorspace_raw : known_colorspace); } else { - compiler.parameter_texture("filename", slots[0]); + compiler.parameter_texture("filename", handle.svm_slot()); } compiler.parameter(this, "projection"); @@ -1350,7 +1249,7 @@ NODE_DEFINE(MusgraveTextureNode) SOCKET_IN_FLOAT(scale, "Scale", 1.0f); SOCKET_IN_FLOAT(detail, "Detail", 2.0f); SOCKET_IN_FLOAT(dimension, "Dimension", 2.0f); - SOCKET_IN_FLOAT(lacunarity, "Lacunarity", 1.0f); + SOCKET_IN_FLOAT(lacunarity, "Lacunarity", 2.0f); SOCKET_IN_FLOAT(offset, "Offset", 0.0f); SOCKET_IN_FLOAT(gain, "Gain", 1.0f); @@ -1422,15 +1321,33 @@ NODE_DEFINE(WaveTextureNode) type_enum.insert("rings", NODE_WAVE_RINGS); SOCKET_ENUM(type, "Type", type_enum, NODE_WAVE_BANDS); + static NodeEnum bands_direction_enum; + bands_direction_enum.insert("x", NODE_WAVE_BANDS_DIRECTION_X); + bands_direction_enum.insert("y", NODE_WAVE_BANDS_DIRECTION_Y); + bands_direction_enum.insert("z", NODE_WAVE_BANDS_DIRECTION_Z); + bands_direction_enum.insert("diagonal", NODE_WAVE_BANDS_DIRECTION_DIAGONAL); + SOCKET_ENUM( + bands_direction, "Bands Direction", bands_direction_enum, NODE_WAVE_BANDS_DIRECTION_X); + + static NodeEnum rings_direction_enum; + rings_direction_enum.insert("x", NODE_WAVE_RINGS_DIRECTION_X); + rings_direction_enum.insert("y", NODE_WAVE_RINGS_DIRECTION_Y); + rings_direction_enum.insert("z", NODE_WAVE_RINGS_DIRECTION_Z); + rings_direction_enum.insert("spherical", NODE_WAVE_RINGS_DIRECTION_SPHERICAL); + SOCKET_ENUM( + rings_direction, "Rings Direction", rings_direction_enum, NODE_WAVE_BANDS_DIRECTION_X); + static NodeEnum profile_enum; profile_enum.insert("sine", NODE_WAVE_PROFILE_SIN); profile_enum.insert("saw", NODE_WAVE_PROFILE_SAW); + profile_enum.insert("tri", NODE_WAVE_PROFILE_TRI); SOCKET_ENUM(profile, "Profile", profile_enum, NODE_WAVE_PROFILE_SIN); SOCKET_IN_FLOAT(scale, "Scale", 1.0f); SOCKET_IN_FLOAT(distortion, "Distortion", 0.0f); SOCKET_IN_FLOAT(detail, "Detail", 2.0f); SOCKET_IN_FLOAT(detail_scale, "Detail Scale", 0.0f); + SOCKET_IN_FLOAT(phase, "Phase Offset", 0.0f); SOCKET_IN_POINT( vector, "Vector", make_float3(0.0f, 0.0f, 0.0f), SocketType::LINK_TEXTURE_GENERATED); @@ -1446,32 +1363,36 @@ WaveTextureNode::WaveTextureNode() : TextureNode(node_type) void WaveTextureNode::compile(SVMCompiler &compiler) { + ShaderInput *vector_in = input("Vector"); ShaderInput *scale_in = input("Scale"); ShaderInput *distortion_in = input("Distortion"); - ShaderInput *dscale_in = input("Detail Scale"); ShaderInput *detail_in = input("Detail"); - ShaderInput *vector_in = input("Vector"); - ShaderOutput *fac_out = output("Fac"); + ShaderInput *dscale_in = input("Detail Scale"); + ShaderInput *phase_in = input("Phase Offset"); ShaderOutput *color_out = output("Color"); + ShaderOutput *fac_out = output("Fac"); int vector_offset = tex_mapping.compile_begin(compiler, vector_in); compiler.add_node(NODE_TEX_WAVE, - compiler.encode_uchar4(type, - compiler.stack_assign_if_linked(color_out), - compiler.stack_assign_if_linked(fac_out), - compiler.stack_assign_if_linked(dscale_in)), + compiler.encode_uchar4(type, bands_direction, rings_direction, profile), compiler.encode_uchar4(vector_offset, compiler.stack_assign_if_linked(scale_in), - compiler.stack_assign_if_linked(detail_in), - compiler.stack_assign_if_linked(distortion_in)), - profile); + compiler.stack_assign_if_linked(distortion_in), + compiler.stack_assign_if_linked(detail_in)), + compiler.encode_uchar4(compiler.stack_assign_if_linked(dscale_in), + compiler.stack_assign_if_linked(phase_in), + compiler.stack_assign_if_linked(color_out), + compiler.stack_assign_if_linked(fac_out))); compiler.add_node(__float_as_int(scale), __float_as_int(detail), __float_as_int(distortion), __float_as_int(detail_scale)); + compiler.add_node( + __float_as_int(phase), SVM_STACK_INVALID, SVM_STACK_INVALID, SVM_STACK_INVALID); + tex_mapping.compile_end(compiler, vector_in, vector_offset); } @@ -1480,6 +1401,8 @@ void WaveTextureNode::compile(OSLCompiler &compiler) tex_mapping.compile(compiler); compiler.parameter(this, "type"); + compiler.parameter(this, "bands_direction"); + compiler.parameter(this, "rings_direction"); compiler.parameter(this, "profile"); compiler.add(this, "node_wave_texture"); @@ -1722,21 +1645,10 @@ NODE_DEFINE(PointDensityTextureNode) PointDensityTextureNode::PointDensityTextureNode() : ShaderNode(node_type) { - image_manager = NULL; - slot = -1; - builtin_data = NULL; } PointDensityTextureNode::~PointDensityTextureNode() { - if (image_manager) { - image_manager->remove_image(filename.string(), - builtin_data, - interpolation, - EXTENSION_CLIP, - IMAGE_ALPHA_AUTO, - ustring()); - } } ShaderNode *PointDensityTextureNode::clone() const @@ -1744,10 +1656,9 @@ ShaderNode *PointDensityTextureNode::clone() const /* Increase image user count for new node. We need to ensure to not call * add_image again, to work around access of freed data on the Blender * side. A better solution should be found to avoid this. */ - if (slot != -1) { - image_manager->add_image_user(slot); - } - return new PointDensityTextureNode(*this); + PointDensityTextureNode *node = new PointDensityTextureNode(*this); + node->handle = handle; /* TODO: not needed? */ + return node; } void PointDensityTextureNode::attributes(Shader *shader, AttributeRequestSet *attributes) @@ -1758,20 +1669,11 @@ void PointDensityTextureNode::attributes(Shader *shader, AttributeRequestSet *at ShaderNode::attributes(shader, attributes); } -void PointDensityTextureNode::add_image() +ImageParams PointDensityTextureNode::image_params() const { - if (slot == -1) { - ImageMetaData metadata; - slot = image_manager->add_image(filename.string(), - builtin_data, - false, - 0, - interpolation, - EXTENSION_CLIP, - IMAGE_ALPHA_AUTO, - u_colorspace_raw, - metadata); - } + ImageParams params; + params.interpolation = interpolation; + return params; } void PointDensityTextureNode::compile(SVMCompiler &compiler) @@ -1783,11 +1685,13 @@ void PointDensityTextureNode::compile(SVMCompiler &compiler) const bool use_density = !density_out->links.empty(); const bool use_color = !color_out->links.empty(); - image_manager = compiler.scene->image_manager; - if (use_density || use_color) { - add_image(); + if (handle.empty()) { + ImageManager *image_manager = compiler.scene->image_manager; + handle = image_manager->add_image(filename.string(), image_params()); + } + const int slot = handle.svm_slot(); if (slot != -1) { compiler.stack_assign(vector_in); compiler.add_node(NODE_TEX_VOXEL, @@ -1824,12 +1728,13 @@ void PointDensityTextureNode::compile(OSLCompiler &compiler) const bool use_density = !density_out->links.empty(); const bool use_color = !color_out->links.empty(); - image_manager = compiler.scene->image_manager; - if (use_density || use_color) { - add_image(); + if (handle.empty()) { + ImageManager *image_manager = compiler.scene->image_manager; + handle = image_manager->add_image(filename.string(), image_params()); + } - compiler.parameter_texture("filename", slot); + compiler.parameter_texture("filename", handle.svm_slot()); if (space == NODE_TEX_VOXEL_SPACE_WORLD) { compiler.parameter("mapping", tfm); compiler.parameter("use_mapping", 1); @@ -3343,7 +3248,7 @@ NODE_DEFINE(PrincipledVolumeNode) SOCKET_IN_COLOR(emission_color, "Emission Color", make_float3(1.0f, 1.0f, 1.0f)); SOCKET_IN_FLOAT(blackbody_intensity, "Blackbody Intensity", 0.0f); SOCKET_IN_COLOR(blackbody_tint, "Blackbody Tint", make_float3(1.0f, 1.0f, 1.0f)); - SOCKET_IN_FLOAT(temperature, "Temperature", 1500.0f); + SOCKET_IN_FLOAT(temperature, "Temperature", 1000.0f); SOCKET_IN_FLOAT(volume_mix_weight, "VolumeMixWeight", 0.0f, SocketType::SVM_INTERNAL); SOCKET_OUT_CLOSURE(volume, "Volume"); @@ -3354,6 +3259,8 @@ NODE_DEFINE(PrincipledVolumeNode) PrincipledVolumeNode::PrincipledVolumeNode() : VolumeNode(node_type) { closure = CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID; + density_attribute = ustring("density"); + temperature_attribute = ustring("temperature"); } void PrincipledVolumeNode::attributes(Shader *shader, AttributeRequestSet *attributes) @@ -4495,7 +4402,10 @@ VertexColorNode::VertexColorNode() : ShaderNode(node_type) void VertexColorNode::attributes(Shader *shader, AttributeRequestSet *attributes) { if (!(output("Color")->links.empty() && output("Alpha")->links.empty())) { - attributes->add_standard(layer_name); + if (layer_name != "") + attributes->add_standard(layer_name); + else + attributes->add(ATTR_STD_VERTEX_COLOR); } ShaderNode::attributes(shader, attributes); } @@ -4504,7 +4414,14 @@ void VertexColorNode::compile(SVMCompiler &compiler) { ShaderOutput *color_out = output("Color"); ShaderOutput *alpha_out = output("Alpha"); - int layer_id = compiler.attribute(layer_name); + int layer_id = 0; + + if (layer_name != "") { + layer_id = compiler.attribute(layer_name); + } + else { + layer_id = compiler.attribute(ATTR_STD_VERTEX_COLOR); + } ShaderNodeType node; @@ -4531,7 +4448,19 @@ void VertexColorNode::compile(OSLCompiler &compiler) else { compiler.parameter("bump_offset", "center"); } - compiler.parameter("layer_name", layer_name.c_str()); + + if (layer_name.empty()) { + compiler.parameter("layer_name", ustring("geom:vertex_color")); + } + else { + if (Attribute::name_standard(layer_name.c_str()) != ATTR_STD_NONE) { + compiler.parameter("name", (string("geom:") + layer_name.c_str()).c_str()); + } + else { + compiler.parameter("layer_name", layer_name.c_str()); + } + } + compiler.add(this, "node_vertex_color"); } @@ -6022,14 +5951,20 @@ NODE_DEFINE(VectorMathNode) type_enum.insert("floor", NODE_VECTOR_MATH_FLOOR); type_enum.insert("ceil", NODE_VECTOR_MATH_CEIL); type_enum.insert("modulo", NODE_VECTOR_MATH_MODULO); + type_enum.insert("wrap", NODE_VECTOR_MATH_WRAP); type_enum.insert("fraction", NODE_VECTOR_MATH_FRACTION); type_enum.insert("absolute", NODE_VECTOR_MATH_ABSOLUTE); type_enum.insert("minimum", NODE_VECTOR_MATH_MINIMUM); type_enum.insert("maximum", NODE_VECTOR_MATH_MAXIMUM); + + type_enum.insert("sine", NODE_VECTOR_MATH_SINE); + type_enum.insert("cosine", NODE_VECTOR_MATH_COSINE); + type_enum.insert("tangent", NODE_VECTOR_MATH_TANGENT); SOCKET_ENUM(type, "Type", type_enum, NODE_VECTOR_MATH_ADD); SOCKET_IN_VECTOR(vector1, "Vector1", make_float3(0.0f, 0.0f, 0.0f)); SOCKET_IN_VECTOR(vector2, "Vector2", make_float3(0.0f, 0.0f, 0.0f)); + SOCKET_IN_VECTOR(vector3, "Vector3", make_float3(0.0f, 0.0f, 0.0f)); SOCKET_IN_FLOAT(scale, "Scale", 1.0f); SOCKET_OUT_FLOAT(value, "Value"); @@ -6048,7 +5983,7 @@ void VectorMathNode::constant_fold(const ConstantFolder &folder) float3 vector = make_float3(0.0f, 0.0f, 0.0f); if (folder.all_inputs_constant()) { - svm_vector_math(&value, &vector, type, vector1, vector2, scale); + svm_vector_math(&value, &vector, type, vector1, vector2, vector3, scale); if (folder.output == output("Value")) { folder.make_constant(value); } @@ -6075,11 +6010,24 @@ void VectorMathNode::compile(SVMCompiler &compiler) int value_stack_offset = compiler.stack_assign_if_linked(value_out); int vector_stack_offset = compiler.stack_assign_if_linked(vector_out); - compiler.add_node( - NODE_VECTOR_MATH, - type, - compiler.encode_uchar4(vector1_stack_offset, vector2_stack_offset, scale_stack_offset), - compiler.encode_uchar4(value_stack_offset, vector_stack_offset)); + /* 3 Vector Operators */ + if (type == NODE_VECTOR_MATH_WRAP) { + ShaderInput *vector3_in = input("Vector3"); + int vector3_stack_offset = compiler.stack_assign(vector3_in); + compiler.add_node( + NODE_VECTOR_MATH, + type, + compiler.encode_uchar4(vector1_stack_offset, vector2_stack_offset, scale_stack_offset), + compiler.encode_uchar4(value_stack_offset, vector_stack_offset)); + compiler.add_node(vector3_stack_offset); + } + else { + compiler.add_node( + NODE_VECTOR_MATH, + type, + compiler.encode_uchar4(vector1_stack_offset, vector2_stack_offset, scale_stack_offset), + compiler.encode_uchar4(value_stack_offset, vector_stack_offset)); + } } void VectorMathNode::compile(OSLCompiler &compiler) @@ -6088,6 +6036,62 @@ void VectorMathNode::compile(OSLCompiler &compiler) compiler.add(this, "node_vector_math"); } +/* Vector Rotate */ + +NODE_DEFINE(VectorRotateNode) +{ + NodeType *type = NodeType::add("vector_rotate", create, NodeType::SHADER); + + static NodeEnum type_enum; + type_enum.insert("axis", NODE_VECTOR_ROTATE_TYPE_AXIS); + type_enum.insert("x_axis", NODE_VECTOR_ROTATE_TYPE_AXIS_X); + type_enum.insert("y_axis", NODE_VECTOR_ROTATE_TYPE_AXIS_Y); + type_enum.insert("z_axis", NODE_VECTOR_ROTATE_TYPE_AXIS_Z); + type_enum.insert("euler_xyz", NODE_VECTOR_ROTATE_TYPE_EULER_XYZ); + SOCKET_ENUM(type, "Type", type_enum, NODE_VECTOR_ROTATE_TYPE_AXIS); + + SOCKET_BOOLEAN(invert, "Invert", false); + + SOCKET_IN_VECTOR(vector, "Vector", make_float3(0.0f, 0.0f, 0.0f)); + SOCKET_IN_POINT(rotation, "Rotation", make_float3(0.0f, 0.0f, 0.0f)); + SOCKET_IN_POINT(center, "Center", make_float3(0.0f, 0.0f, 0.0f)); + SOCKET_IN_VECTOR(axis, "Axis", make_float3(0.0f, 0.0f, 1.0f)); + SOCKET_IN_FLOAT(angle, "Angle", 0.0f); + SOCKET_OUT_VECTOR(vector, "Vector"); + + return type; +} + +VectorRotateNode::VectorRotateNode() : ShaderNode(node_type) +{ +} + +void VectorRotateNode::compile(SVMCompiler &compiler) +{ + ShaderInput *vector_in = input("Vector"); + ShaderInput *rotation_in = input("Rotation"); + ShaderInput *center_in = input("Center"); + ShaderInput *axis_in = input("Axis"); + ShaderInput *angle_in = input("Angle"); + ShaderOutput *vector_out = output("Vector"); + + compiler.add_node( + NODE_VECTOR_ROTATE, + compiler.encode_uchar4( + type, compiler.stack_assign(vector_in), compiler.stack_assign(rotation_in), invert), + compiler.encode_uchar4(compiler.stack_assign(center_in), + compiler.stack_assign(axis_in), + compiler.stack_assign(angle_in)), + compiler.stack_assign(vector_out)); +} + +void VectorRotateNode::compile(OSLCompiler &compiler) +{ + compiler.parameter(this, "type"); + compiler.parameter(this, "invert"); + compiler.add(this, "node_vector_rotate"); +} + /* VectorTransform */ NODE_DEFINE(VectorTransformNode) diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h index a8fe7644957..e201118574b 100644 --- a/intern/cycles/render/nodes.h +++ b/intern/cycles/render/nodes.h @@ -17,8 +17,9 @@ #ifndef __NODES_H__ #define __NODES_H__ -#include "render/graph.h" #include "graph/node.h" +#include "render/graph.h" +#include "render/image.h" #include "util/util_array.h" #include "util/util_string.h" @@ -77,12 +78,15 @@ class ImageSlotTextureNode : public TextureNode { explicit ImageSlotTextureNode(const NodeType *node_type) : TextureNode(node_type) { special_type = SHADER_SPECIAL_TYPE_IMAGE_SLOT; - image_manager = NULL; } - ~ImageSlotTextureNode(); - void add_image_user() const; - ImageManager *image_manager; - vector<int> slots; + + virtual bool equals(const ShaderNode &other) + { + const ImageSlotTextureNode &other_node = (const ImageSlotTextureNode &)other; + return TextureNode::equals(other) && handle == other_node.handle; + } + + ImageHandle handle; }; class ImageTextureNode : public ImageSlotTextureNode { @@ -97,14 +101,14 @@ class ImageTextureNode : public ImageSlotTextureNode { virtual bool equals(const ShaderNode &other) { - const ImageTextureNode &image_node = (const ImageTextureNode &)other; - return ImageSlotTextureNode::equals(other) && builtin_data == image_node.builtin_data && - animated == image_node.animated; + const ImageTextureNode &other_node = (const ImageTextureNode &)other; + return ImageSlotTextureNode::equals(other) && animated == other_node.animated; } + ImageParams image_params() const; + /* Parameters. */ ustring filename; - void *builtin_data; ustring colorspace; ImageAlphaType alpha_type; NodeImageProjection projection; @@ -115,11 +119,6 @@ class ImageTextureNode : public ImageSlotTextureNode { float3 vector; ccl::vector<int> tiles; - /* Runtime. */ - bool is_float; - bool compress_as_srgb; - ustring known_colorspace; - protected: void cull_tiles(Scene *scene, ShaderGraph *graph); }; @@ -140,25 +139,20 @@ class EnvironmentTextureNode : public ImageSlotTextureNode { virtual bool equals(const ShaderNode &other) { - const EnvironmentTextureNode &env_node = (const EnvironmentTextureNode &)other; - return ImageSlotTextureNode::equals(other) && builtin_data == env_node.builtin_data && - animated == env_node.animated; + const EnvironmentTextureNode &other_node = (const EnvironmentTextureNode &)other; + return ImageSlotTextureNode::equals(other) && animated == other_node.animated; } + ImageParams image_params() const; + /* Parameters. */ ustring filename; - void *builtin_data; ustring colorspace; ImageAlphaType alpha_type; NodeEnvironmentProjection projection; InterpolationType interpolation; bool animated; float3 vector; - - /* Runtime. */ - bool is_float; - bool compress_as_srgb; - ustring known_colorspace; }; class SkyTextureNode : public TextureNode { @@ -203,6 +197,11 @@ class OutputAOVNode : public ShaderNode { ustring name; + virtual int get_group() + { + return NODE_GROUP_LEVEL_4; + } + /* Don't allow output node de-duplication. */ virtual bool equals(const ShaderNode & /*other*/) { @@ -288,9 +287,11 @@ class WaveTextureNode : public TextureNode { } NodeWaveType type; + NodeWaveBandsDirection bands_direction; + NodeWaveRingsDirection rings_direction; NodeWaveProfile profile; - float scale, distortion, detail, detail_scale; + float scale, distortion, detail, detail_scale, phase; float3 vector; }; @@ -343,7 +344,7 @@ class PointDensityTextureNode : public ShaderNode { SHADER_NODE_NO_CLONE_CLASS(PointDensityTextureNode) virtual int get_group() { - return NODE_GROUP_LEVEL_3; + return NODE_GROUP_LEVEL_4; } ~PointDensityTextureNode(); @@ -363,24 +364,22 @@ class PointDensityTextureNode : public ShaderNode { return true; } - void add_image(); - /* Parameters. */ ustring filename; NodeTexVoxelSpace space; InterpolationType interpolation; Transform tfm; float3 vector; - void *builtin_data; /* Runtime. */ - ImageManager *image_manager; - int slot; + ImageHandle handle; + + ImageParams image_params() const; virtual bool equals(const ShaderNode &other) { - const PointDensityTextureNode &point_dendity_node = (const PointDensityTextureNode &)other; - return ShaderNode::equals(other) && builtin_data == point_dendity_node.builtin_data; + const PointDensityTextureNode &other_node = (const PointDensityTextureNode &)other; + return ShaderNode::equals(other) && handle == other_node.handle; } }; @@ -1377,10 +1376,28 @@ class VectorMathNode : public ShaderNode { float3 vector1; float3 vector2; + float3 vector3; float scale; NodeVectorMathType type; }; +class VectorRotateNode : public ShaderNode { + public: + SHADER_NODE_CLASS(VectorRotateNode) + + virtual int get_group() + { + return NODE_GROUP_LEVEL_3; + } + NodeVectorRotateType type; + bool invert; + float3 vector; + float3 center; + float3 axis; + float angle; + float3 rotation; +}; + class VectorTransformNode : public ShaderNode { public: SHADER_NODE_CLASS(VectorTransformNode) diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index 849329a086d..90a1d90019d 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -14,22 +14,24 @@ * limitations under the License. */ -#include "render/camera.h" +#include "render/object.h" #include "device/device.h" +#include "render/camera.h" +#include "render/curves.h" +#include "render/hair.h" +#include "render/integrator.h" #include "render/light.h" #include "render/mesh.h" -#include "render/curves.h" -#include "render/object.h" #include "render/particles.h" #include "render/scene.h" #include "util/util_foreach.h" #include "util/util_logging.h" #include "util/util_map.h" +#include "util/util_murmurhash.h" #include "util/util_progress.h" #include "util/util_set.h" #include "util/util_vector.h" -#include "util/util_murmurhash.h" #include "subd/subd_patch_table.h" @@ -64,6 +66,7 @@ struct UpdateObjectTransformState { KernelObject *objects; Transform *object_motion_pass; DecomposedTransform *object_motion; + float *object_volume_step; /* Flags which will be synchronized to Integrator. */ bool have_motion; @@ -87,7 +90,7 @@ NODE_DEFINE(Object) { NodeType *type = NodeType::add("object", create); - SOCKET_NODE(mesh, "Mesh", &Mesh::node_type); + SOCKET_NODE(geometry, "Geometry", &Geometry::node_base_type); SOCKET_TRANSFORM(tfm, "Transform", transform_identity()); SOCKET_UINT(visibility, "Visibility", ~0); SOCKET_COLOR(color, "Color", make_float3(0.0f, 0.0f, 0.0f)); @@ -152,7 +155,7 @@ void Object::update_motion() void Object::compute_bounds(bool motion_blur) { - BoundBox mbounds = mesh->bounds; + BoundBox mbounds = geometry->bounds; if (motion_blur && use_motion()) { array<DecomposedTransform> decomp(motion.size()); @@ -172,7 +175,7 @@ void Object::compute_bounds(bool motion_blur) } else { /* No motion blur case. */ - if (mesh->transform_applied) { + if (geometry->transform_applied) { bounds = mbounds; } else { @@ -183,89 +186,18 @@ void Object::compute_bounds(bool motion_blur) void Object::apply_transform(bool apply_to_motion) { - if (!mesh || tfm == transform_identity()) + if (!geometry || tfm == transform_identity()) return; - /* triangles */ - if (mesh->verts.size()) { - /* store matrix to transform later. when accessing these as attributes we - * do not want the transform to be applied for consistency between static - * and dynamic BVH, so we do it on packing. */ - mesh->transform_normal = transform_transposed_inverse(tfm); - - /* apply to mesh vertices */ - for (size_t i = 0; i < mesh->verts.size(); i++) - mesh->verts[i] = transform_point(&tfm, mesh->verts[i]); - - if (apply_to_motion) { - Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); - - if (attr) { - size_t steps_size = mesh->verts.size() * (mesh->motion_steps - 1); - float3 *vert_steps = attr->data_float3(); - - for (size_t i = 0; i < steps_size; i++) - vert_steps[i] = transform_point(&tfm, vert_steps[i]); - } - - Attribute *attr_N = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL); - - if (attr_N) { - Transform ntfm = mesh->transform_normal; - size_t steps_size = mesh->verts.size() * (mesh->motion_steps - 1); - float3 *normal_steps = attr_N->data_float3(); - - for (size_t i = 0; i < steps_size; i++) - normal_steps[i] = normalize(transform_direction(&ntfm, normal_steps[i])); - } - } - } - - /* curves */ - if (mesh->curve_keys.size()) { - /* compute uniform scale */ - float3 c0 = transform_get_column(&tfm, 0); - float3 c1 = transform_get_column(&tfm, 1); - float3 c2 = transform_get_column(&tfm, 2); - float scalar = powf(fabsf(dot(cross(c0, c1), c2)), 1.0f / 3.0f); - - /* apply transform to curve keys */ - for (size_t i = 0; i < mesh->curve_keys.size(); i++) { - float3 co = transform_point(&tfm, mesh->curve_keys[i]); - float radius = mesh->curve_radius[i] * scalar; - - /* scale for curve radius is only correct for uniform scale */ - mesh->curve_keys[i] = co; - mesh->curve_radius[i] = radius; - } - - if (apply_to_motion) { - Attribute *curve_attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); - - if (curve_attr) { - /* apply transform to motion curve keys */ - size_t steps_size = mesh->curve_keys.size() * (mesh->motion_steps - 1); - float4 *key_steps = curve_attr->data_float4(); - - for (size_t i = 0; i < steps_size; i++) { - float3 co = transform_point(&tfm, float4_to_float3(key_steps[i])); - float radius = key_steps[i].w * scalar; - - /* scale for curve radius is only correct for uniform scale */ - key_steps[i] = float3_to_float4(co); - key_steps[i].w = radius; - } - } - } - } + geometry->apply_transform(tfm, apply_to_motion); /* we keep normals pointing in same direction on negative scale, notify - * mesh about this in it (re)calculates normals */ + * geometry about this in it (re)calculates normals */ if (transform_negative_scale(tfm)) - mesh->transform_negative_scaled = true; + geometry->transform_negative_scaled = true; if (bounds.valid()) { - mesh->compute_bounds(); + geometry->compute_bounds(); compute_bounds(false); } @@ -275,11 +207,11 @@ void Object::apply_transform(bool apply_to_motion) void Object::tag_update(Scene *scene) { - if (mesh) { - if (mesh->transform_applied) - mesh->need_update = true; + if (geometry) { + if (geometry->transform_applied) + geometry->need_update = true; - foreach (Shader *shader, mesh->used_shaders) { + foreach (Shader *shader, geometry->used_shaders) { if (shader->use_mis && shader->has_surface_emission) scene->light_manager->need_update = true; } @@ -287,7 +219,7 @@ void Object::tag_update(Scene *scene) scene->camera->need_flags_update = true; scene->curve_system_manager->need_update = true; - scene->mesh_manager->need_update = true; + scene->geometry_manager->need_update = true; scene->object_manager->need_update = true; } @@ -336,6 +268,82 @@ uint Object::visibility_for_tracing() const return trace_visibility; } +float Object::compute_volume_step_size() const +{ + if (geometry->type != Geometry::MESH) { + return FLT_MAX; + } + + Mesh *mesh = static_cast<Mesh *>(geometry); + + if (!mesh->has_volume) { + return FLT_MAX; + } + + /* Compute step rate from shaders. */ + float step_rate = FLT_MAX; + + foreach (Shader *shader, mesh->used_shaders) { + if (shader->has_volume) { + if ((shader->heterogeneous_volume && shader->has_volume_spatial_varying) || + (shader->has_volume_attribute_dependency)) { + step_rate = fminf(shader->volume_step_rate, step_rate); + } + } + } + + if (step_rate == FLT_MAX) { + return FLT_MAX; + } + + /* Compute step size from voxel grids. */ + float step_size = FLT_MAX; + + foreach (Attribute &attr, mesh->attributes.attributes) { + if (attr.element == ATTR_ELEMENT_VOXEL) { + ImageHandle &handle = attr.data_voxel(); + const ImageMetaData &metadata = handle.metadata(); + if (metadata.width == 0 || metadata.height == 0 || metadata.depth == 0) { + continue; + } + + /* User specified step size. */ + float voxel_step_size = mesh->volume_step_size; + + if (voxel_step_size == 0.0f) { + /* Auto detect step size. */ + float3 size = make_float3( + 1.0f / metadata.width, 1.0f / metadata.height, 1.0f / metadata.depth); + + /* Step size is transformed from voxel to world space. */ + Transform voxel_tfm = tfm; + if (metadata.use_transform_3d) { + voxel_tfm = tfm * transform_inverse(metadata.transform_3d); + } + voxel_step_size = min3(fabs(transform_direction(&voxel_tfm, size))); + } + else if (mesh->volume_object_space) { + /* User specified step size in object space. */ + float3 size = make_float3(voxel_step_size, voxel_step_size, voxel_step_size); + voxel_step_size = min3(fabs(transform_direction(&tfm, size))); + } + + if (voxel_step_size > 0.0f) { + step_size = fminf(voxel_step_size, step_size); + } + } + } + + if (step_size == FLT_MAX) { + /* Fall back to 1/10th of bounds for procedural volumes. */ + step_size = 0.1f * average(bounds.size()); + } + + step_size *= step_rate; + + return step_size; +} + int Object::get_device_index() const { return index; @@ -353,32 +361,33 @@ ObjectManager::~ObjectManager() { } -void ObjectManager::device_update_object_transform(UpdateObjectTransformState *state, Object *ob) +static float object_surface_area(UpdateObjectTransformState *state, + const Transform &tfm, + Geometry *geom) { - KernelObject &kobject = state->objects[ob->index]; - Transform *object_motion_pass = state->object_motion_pass; - - Mesh *mesh = ob->mesh; - uint flag = 0; + if (geom->type != Geometry::MESH) { + return 0.0f; + } - /* Compute transformations. */ - Transform tfm = ob->tfm; - Transform itfm = transform_inverse(tfm); + Mesh *mesh = static_cast<Mesh *>(geom); + if (mesh->has_volume) { + /* Volume density automatically adjust to object scale. */ + if (mesh->volume_object_space) { + const float3 unit = normalize(make_float3(1.0f, 1.0f, 1.0f)); + return 1.0f / len(transform_direction(&tfm, unit)); + } + else { + return 1.0f; + } + } /* Compute surface area. for uniform scale we can do avoid the many * transform calls and share computation for instances. * * TODO(brecht): Correct for displacement, and move to a better place. */ - float uniform_scale; float surface_area = 0.0f; - float3 color = ob->color; - float pass_id = ob->pass_id; - float random_number = (float)ob->random_id * (1.0f / (float)0xFFFFFFFF); - int particle_index = (ob->particle_system) ? - ob->particle_index + state->particle_offset[ob->particle_system] : - 0; - + float uniform_scale; if (transform_uniform_scale(tfm, uniform_scale)) { map<Mesh *, float>::iterator it; @@ -424,9 +433,31 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s } } + return surface_area; +} + +void ObjectManager::device_update_object_transform(UpdateObjectTransformState *state, Object *ob) +{ + KernelObject &kobject = state->objects[ob->index]; + Transform *object_motion_pass = state->object_motion_pass; + + Geometry *geom = ob->geometry; + uint flag = 0; + + /* Compute transformations. */ + Transform tfm = ob->tfm; + Transform itfm = transform_inverse(tfm); + + float3 color = ob->color; + float pass_id = ob->pass_id; + float random_number = (float)ob->random_id * (1.0f / (float)0xFFFFFFFF); + int particle_index = (ob->particle_system) ? + ob->particle_index + state->particle_offset[ob->particle_system] : + 0; + kobject.tfm = tfm; kobject.itfm = itfm; - kobject.surface_area = surface_area; + kobject.surface_area = object_surface_area(state, tfm, geom); kobject.color[0] = color.x; kobject.color[1] = color.y; kobject.color[2] = color.z; @@ -435,11 +466,16 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s kobject.particle_index = particle_index; kobject.motion_offset = 0; - if (mesh->use_motion_blur) { + if (geom->use_motion_blur) { state->have_motion = true; } - if (mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) { - flag |= SD_OBJECT_HAS_VERTEX_MOTION; + + if (geom->type == Geometry::MESH) { + /* TODO: why only mesh? */ + Mesh *mesh = static_cast<Mesh *>(geom); + if (mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) { + flag |= SD_OBJECT_HAS_VERTEX_MOTION; + } } if (state->need_motion == Scene::MOTION_PASS) { @@ -460,7 +496,7 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s /* Motion transformations, is world/object space depending if mesh * comes with deformed position in object space, or if we transform * the shading point in world space. */ - if (!mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) { + if (!(flag & SD_OBJECT_HAS_VERTEX_MOTION)) { tfm_pre = tfm_pre * itfm; tfm_post = tfm_post * itfm; } @@ -485,12 +521,13 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s kobject.dupli_generated[0] = ob->dupli_generated[0]; kobject.dupli_generated[1] = ob->dupli_generated[1]; kobject.dupli_generated[2] = ob->dupli_generated[2]; - kobject.numkeys = mesh->curve_keys.size(); + kobject.numkeys = (geom->type == Geometry::HAIR) ? static_cast<Hair *>(geom)->curve_keys.size() : + 0; kobject.dupli_uv[0] = ob->dupli_uv[0]; kobject.dupli_uv[1] = ob->dupli_uv[1]; - int totalsteps = mesh->motion_steps; + int totalsteps = geom->motion_steps; kobject.numsteps = (totalsteps - 1) / 2; - kobject.numverts = mesh->verts.size(); + kobject.numverts = (geom->type == Geometry::MESH) ? static_cast<Mesh *>(geom)->verts.size() : 0; kobject.patch_map_offset = 0; kobject.attribute_map_offset = 0; uint32_t hash_name = util_murmur_hash3(ob->name.c_str(), ob->name.length(), 0); @@ -503,9 +540,10 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s flag |= SD_OBJECT_HOLDOUT_MASK; } state->object_flag[ob->index] = flag; + state->object_volume_step[ob->index] = FLT_MAX; /* Have curves. */ - if (mesh->num_curves()) { + if (geom->type == Geometry::HAIR) { state->have_curves = true; } } @@ -556,6 +594,7 @@ void ObjectManager::device_update_transforms(DeviceScene *dscene, Scene *scene, state.objects = dscene->objects.alloc(scene->objects.size()); state.object_flag = dscene->object_flag.alloc(scene->objects.size()); + state.object_volume_step = dscene->object_volume_step.alloc(scene->objects.size()); state.object_motion = NULL; state.object_motion_pass = NULL; @@ -676,25 +715,30 @@ void ObjectManager::device_update_flags( /* Object info flag. */ uint *object_flag = dscene->object_flag.data(); + float *object_volume_step = dscene->object_volume_step.data(); /* Object volume intersection. */ vector<Object *> volume_objects; bool has_volume_objects = false; foreach (Object *object, scene->objects) { - if (object->mesh->has_volume) { + if (object->geometry->has_volume) { if (bounds_valid) { volume_objects.push_back(object); } has_volume_objects = true; + object_volume_step[object->index] = object->compute_volume_step_size(); + } + else { + object_volume_step[object->index] = FLT_MAX; } } foreach (Object *object, scene->objects) { - if (object->mesh->has_volume) { + if (object->geometry->has_volume) { object_flag[object->index] |= SD_OBJECT_HAS_VOLUME; object_flag[object->index] &= ~SD_OBJECT_HAS_VOLUME_ATTRIBUTES; - foreach (Attribute &attr, object->mesh->attributes.attributes) { + foreach (Attribute &attr, object->geometry->attributes.attributes) { if (attr.element == ATTR_ELEMENT_VOXEL) { object_flag[object->index] |= SD_OBJECT_HAS_VOLUME_ATTRIBUTES; } @@ -703,6 +747,7 @@ void ObjectManager::device_update_flags( else { object_flag[object->index] &= ~(SD_OBJECT_HAS_VOLUME | SD_OBJECT_HAS_VOLUME_ATTRIBUTES); } + if (object->is_shadow_catcher) { object_flag[object->index] |= SD_OBJECT_SHADOW_CATCHER; } @@ -731,6 +776,7 @@ void ObjectManager::device_update_flags( /* Copy object flag. */ dscene->object_flag.copy_to_device(); + dscene->object_volume_step.copy_to_device(); } void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Scene *scene) @@ -744,21 +790,24 @@ void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Sc bool update = false; foreach (Object *object, scene->objects) { - Mesh *mesh = object->mesh; - - if (mesh->patch_table) { - uint patch_map_offset = 2 * (mesh->patch_table_offset + mesh->patch_table->total_size() - - mesh->patch_table->num_nodes * PATCH_NODE_SIZE) - - mesh->patch_offset; - - if (kobjects[object->index].patch_map_offset != patch_map_offset) { - kobjects[object->index].patch_map_offset = patch_map_offset; - update = true; + Geometry *geom = object->geometry; + + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + if (mesh->patch_table) { + uint patch_map_offset = 2 * (mesh->patch_table_offset + mesh->patch_table->total_size() - + mesh->patch_table->num_nodes * PATCH_NODE_SIZE) - + mesh->patch_offset; + + if (kobjects[object->index].patch_map_offset != patch_map_offset) { + kobjects[object->index].patch_map_offset = patch_map_offset; + update = true; + } } } - if (kobjects[object->index].attribute_map_offset != mesh->attr_map_offset) { - kobjects[object->index].attribute_map_offset = mesh->attr_map_offset; + if (kobjects[object->index].attribute_map_offset != geom->attr_map_offset) { + kobjects[object->index].attribute_map_offset = geom->attr_map_offset; update = true; } } @@ -774,15 +823,16 @@ void ObjectManager::device_free(Device *, DeviceScene *dscene) dscene->object_motion_pass.free(); dscene->object_motion.free(); dscene->object_flag.free(); + dscene->object_volume_step.free(); } void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, Progress &progress) { /* todo: normals and displacement should be done before applying transform! */ - /* todo: create objects/meshes in right order! */ + /* todo: create objects/geometry in right order! */ - /* counter mesh users */ - map<Mesh *, int> mesh_users; + /* counter geometry users */ + map<Geometry *, int> geometry_users; Scene::MotionType need_motion = scene->need_motion(); bool motion_blur = need_motion == Scene::MOTION_BLUR; bool apply_to_motion = need_motion != Scene::MOTION_PASS; @@ -790,10 +840,10 @@ void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, P bool have_instancing = false; foreach (Object *object, scene->objects) { - map<Mesh *, int>::iterator it = mesh_users.find(object->mesh); + map<Geometry *, int>::iterator it = geometry_users.find(object->geometry); - if (it == mesh_users.end()) - mesh_users[object->mesh] = 1; + if (it == geometry_users.end()) + geometry_users[object->geometry] = 1; else it->second++; } @@ -803,27 +853,34 @@ void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, P uint *object_flag = dscene->object_flag.data(); - /* apply transforms for objects with single user meshes */ + /* apply transforms for objects with single user geometry */ foreach (Object *object, scene->objects) { /* Annoying feedback loop here: we can't use is_instanced() because * it'll use uninitialized transform_applied flag. * - * Could be solved by moving reference counter to Mesh. + * Could be solved by moving reference counter to Geometry. */ - if ((mesh_users[object->mesh] == 1 && !object->mesh->has_surface_bssrdf) && - !object->mesh->has_true_displacement() && - object->mesh->subdivision_type == Mesh::SUBDIVISION_NONE) { + Geometry *geom = object->geometry; + bool apply = (geometry_users[geom] == 1) && !geom->has_surface_bssrdf && + !geom->has_true_displacement(); + + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); + apply = apply && mesh->subdivision_type == Mesh::SUBDIVISION_NONE; + } + + if (apply) { if (!(motion_blur && object->use_motion())) { - if (!object->mesh->transform_applied) { + if (!geom->transform_applied) { object->apply_transform(apply_to_motion); - object->mesh->transform_applied = true; + geom->transform_applied = true; if (progress.get_cancel()) return; } object_flag[i] |= SD_OBJECT_TRANSFORM_APPLIED; - if (object->mesh->transform_negative_scaled) + if (geom->transform_negative_scaled) object_flag[i] |= SD_OBJECT_NEGATIVE_SCALE_APPLIED; } else @@ -842,7 +899,7 @@ void ObjectManager::tag_update(Scene *scene) { need_update = true; scene->curve_system_manager->need_update = true; - scene->mesh_manager->need_update = true; + scene->geometry_manager->need_update = true; scene->light_manager->need_update = true; } diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h index cbbff0d4c6d..7c84c2de4fb 100644 --- a/intern/cycles/render/object.h +++ b/intern/cycles/render/object.h @@ -23,8 +23,8 @@ #include "util/util_array.h" #include "util/util_boundbox.h" #include "util/util_param.h" -#include "util/util_transform.h" #include "util/util_thread.h" +#include "util/util_transform.h" #include "util/util_types.h" #include "util/util_vector.h" @@ -32,7 +32,7 @@ CCL_NAMESPACE_BEGIN class Device; class DeviceScene; -class Mesh; +class Geometry; class ParticleSystem; class Progress; class Scene; @@ -46,7 +46,7 @@ class Object : public Node { public: NODE_DECLARE - Mesh *mesh; + Geometry *geometry; Transform tfm; BoundBox bounds; uint random_id; @@ -81,6 +81,9 @@ class Object : public Node { int motion_step(float time) const; void update_motion(); + /* Maximum number of motion steps supported (due to Embree). */ + static const uint MAX_MOTION_STEPS = 129; + /* Check whether object is traceable and it worth adding it to * kernel scene. */ @@ -94,6 +97,9 @@ class Object : public Node { /* Returns the index that is used in the kernel for this object. */ int get_device_index() const; + /* Compute step size from attributes, shaders, transforms. */ + float compute_volume_step_size() const; + protected: /* Specifies the position of the object in scene->objects and * in the device vectors. Gets set in device_update. */ diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp index 1f0a243e6c1..06d832a29ca 100644 --- a/intern/cycles/render/osl.cpp +++ b/intern/cycles/render/osl.cpp @@ -20,10 +20,10 @@ #include "render/colorspace.h" #include "render/graph.h" #include "render/light.h" +#include "render/nodes.h" #include "render/osl.h" #include "render/scene.h" #include "render/shader.h" -#include "render/nodes.h" #ifdef WITH_OSL @@ -102,8 +102,8 @@ void OSLShaderManager::device_update(Device *device, device_free(device, dscene, scene); - /* determine which shaders are in use */ - device_update_shaders_used(scene); + /* set texture system */ + scene->image_manager->set_osl_texture_system((void *)ts); /* create shaders */ OSLGlobals *og = (OSLGlobals *)device->osl_memory(); @@ -142,9 +142,6 @@ void OSLShaderManager::device_update(Device *device, need_update = false; - /* set texture system */ - scene->image_manager->set_osl_texture_system((void *)ts); - /* add special builtin texture types */ services->textures.insert(ustring("@ao"), new OSLTextureHandle(OSLTextureHandle::AO)); services->textures.insert(ustring("@bevel"), new OSLTextureHandle(OSLTextureHandle::BEVEL)); @@ -319,7 +316,7 @@ bool OSLShaderManager::osl_compile(const string &inputfile, const string &output string include_path_arg = string("-I") + shader_path; options.push_back(include_path_arg); - stdosl_path = path_get("shader/stdosl.h"); + stdosl_path = path_get("shader/stdcycles.h"); /* compile */ OSL::OSLCompiler *compiler = new OSL::OSLCompiler(&OSL::ErrorHandler::default_handler()); @@ -440,27 +437,35 @@ const char *OSLShaderManager::shader_load_bytecode(const string &hash, const str return loaded_shaders.find(hash)->first.c_str(); } -OSLNode *OSLShaderManager::osl_node(const std::string &filepath, +/* This is a static function to avoid RTTI link errors with only this + * file being compiled without RTTI to match OSL and LLVM libraries. */ +OSLNode *OSLShaderManager::osl_node(ShaderManager *manager, + const std::string &filepath, const std::string &bytecode_hash, const std::string &bytecode) { + if (!manager->use_osl()) { + return NULL; + } + /* create query */ + OSLShaderManager *osl_manager = static_cast<OSLShaderManager *>(manager); const char *hash; if (!filepath.empty()) { - hash = shader_load_filepath(filepath); + hash = osl_manager->shader_load_filepath(filepath); } else { - hash = shader_test_loaded(bytecode_hash); + hash = osl_manager->shader_test_loaded(bytecode_hash); if (!hash) - hash = shader_load_bytecode(bytecode_hash, bytecode); + hash = osl_manager->shader_load_bytecode(bytecode_hash, bytecode); } if (!hash) { return NULL; } - OSLShaderInfo *info = shader_loaded_info(hash); + OSLShaderInfo *info = osl_manager->shader_loaded_info(hash); /* count number of inputs */ size_t num_inputs = 0; @@ -755,16 +760,14 @@ void OSLCompiler::add(ShaderNode *node, const char *name, bool isfilepath) else if (current_type == SHADER_TYPE_VOLUME) { if (node->has_spatial_varying()) current_shader->has_volume_spatial_varying = true; + if (node->has_attribute_dependency()) + current_shader->has_volume_attribute_dependency = true; } if (node->has_object_dependency()) { current_shader->has_object_dependency = true; } - if (node->has_attribute_dependency()) { - current_shader->has_attribute_dependency = true; - } - if (node->has_integrator_dependency()) { current_shader->has_integrator_dependency = true; } @@ -1138,8 +1141,8 @@ void OSLCompiler::compile(OSLGlobals *og, Shader *shader) shader->has_displacement = false; shader->has_surface_spatial_varying = false; shader->has_volume_spatial_varying = false; + shader->has_volume_attribute_dependency = false; shader->has_object_dependency = false; - shader->has_attribute_dependency = false; shader->has_integrator_dependency = false; /* generate surface shader */ diff --git a/intern/cycles/render/osl.h b/intern/cycles/render/osl.h index 62cbfebf7eb..4dd9f6630f2 100644 --- a/intern/cycles/render/osl.h +++ b/intern/cycles/render/osl.h @@ -93,9 +93,10 @@ class OSLShaderManager : public ShaderManager { OSLShaderInfo *shader_loaded_info(const string &hash); /* create OSL node using OSLQuery */ - OSLNode *osl_node(const std::string &filepath, - const std::string &bytecode_hash = "", - const std::string &bytecode = ""); + static OSLNode *osl_node(ShaderManager *manager, + const std::string &filepath, + const std::string &bytecode_hash = "", + const std::string &bytecode = ""); protected: void texture_system_init(); diff --git a/intern/cycles/render/particles.cpp b/intern/cycles/render/particles.cpp index 8335404b197..ec9276eff86 100644 --- a/intern/cycles/render/particles.cpp +++ b/intern/cycles/render/particles.cpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "device/device.h" #include "render/particles.h" +#include "device/device.h" #include "render/scene.h" #include "util/util_foreach.h" diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index 1e75fa0f99b..f5b68d5a4fe 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -16,11 +16,11 @@ #include <stdlib.h> +#include "device/device.h" #include "render/background.h" #include "render/bake.h" #include "render/camera.h" #include "render/curves.h" -#include "device/device.h" #include "render/film.h" #include "render/integrator.h" #include "render/light.h" @@ -41,50 +41,59 @@ CCL_NAMESPACE_BEGIN DeviceScene::DeviceScene(Device *device) - : bvh_nodes(device, "__bvh_nodes", MEM_TEXTURE), - bvh_leaf_nodes(device, "__bvh_leaf_nodes", MEM_TEXTURE), - object_node(device, "__object_node", MEM_TEXTURE), - prim_tri_index(device, "__prim_tri_index", MEM_TEXTURE), - prim_tri_verts(device, "__prim_tri_verts", MEM_TEXTURE), - prim_type(device, "__prim_type", MEM_TEXTURE), - prim_visibility(device, "__prim_visibility", MEM_TEXTURE), - prim_index(device, "__prim_index", MEM_TEXTURE), - prim_object(device, "__prim_object", MEM_TEXTURE), - prim_time(device, "__prim_time", MEM_TEXTURE), - tri_shader(device, "__tri_shader", MEM_TEXTURE), - tri_vnormal(device, "__tri_vnormal", MEM_TEXTURE), - tri_vindex(device, "__tri_vindex", MEM_TEXTURE), - tri_patch(device, "__tri_patch", MEM_TEXTURE), - tri_patch_uv(device, "__tri_patch_uv", MEM_TEXTURE), - curves(device, "__curves", MEM_TEXTURE), - curve_keys(device, "__curve_keys", MEM_TEXTURE), - patches(device, "__patches", MEM_TEXTURE), - objects(device, "__objects", MEM_TEXTURE), - object_motion_pass(device, "__object_motion_pass", MEM_TEXTURE), - object_motion(device, "__object_motion", MEM_TEXTURE), - object_flag(device, "__object_flag", MEM_TEXTURE), - camera_motion(device, "__camera_motion", MEM_TEXTURE), - attributes_map(device, "__attributes_map", MEM_TEXTURE), - attributes_float(device, "__attributes_float", MEM_TEXTURE), - attributes_float2(device, "__attributes_float2", MEM_TEXTURE), - attributes_float3(device, "__attributes_float3", MEM_TEXTURE), - attributes_uchar4(device, "__attributes_uchar4", MEM_TEXTURE), - light_distribution(device, "__light_distribution", MEM_TEXTURE), - lights(device, "__lights", MEM_TEXTURE), - light_background_marginal_cdf(device, "__light_background_marginal_cdf", MEM_TEXTURE), - light_background_conditional_cdf(device, "__light_background_conditional_cdf", MEM_TEXTURE), - particles(device, "__particles", MEM_TEXTURE), - svm_nodes(device, "__svm_nodes", MEM_TEXTURE), - shaders(device, "__shaders", MEM_TEXTURE), - lookup_table(device, "__lookup_table", MEM_TEXTURE), - sobol_directions(device, "__sobol_directions", MEM_TEXTURE), - ies_lights(device, "__ies", MEM_TEXTURE) + : bvh_nodes(device, "__bvh_nodes", MEM_GLOBAL), + bvh_leaf_nodes(device, "__bvh_leaf_nodes", MEM_GLOBAL), + object_node(device, "__object_node", MEM_GLOBAL), + prim_tri_index(device, "__prim_tri_index", MEM_GLOBAL), + prim_tri_verts(device, "__prim_tri_verts", MEM_GLOBAL), + prim_type(device, "__prim_type", MEM_GLOBAL), + prim_visibility(device, "__prim_visibility", MEM_GLOBAL), + prim_index(device, "__prim_index", MEM_GLOBAL), + prim_object(device, "__prim_object", MEM_GLOBAL), + prim_time(device, "__prim_time", MEM_GLOBAL), + tri_shader(device, "__tri_shader", MEM_GLOBAL), + tri_vnormal(device, "__tri_vnormal", MEM_GLOBAL), + tri_vindex(device, "__tri_vindex", MEM_GLOBAL), + tri_patch(device, "__tri_patch", MEM_GLOBAL), + tri_patch_uv(device, "__tri_patch_uv", MEM_GLOBAL), + curves(device, "__curves", MEM_GLOBAL), + curve_keys(device, "__curve_keys", MEM_GLOBAL), + patches(device, "__patches", MEM_GLOBAL), + objects(device, "__objects", MEM_GLOBAL), + object_motion_pass(device, "__object_motion_pass", MEM_GLOBAL), + object_motion(device, "__object_motion", MEM_GLOBAL), + object_flag(device, "__object_flag", MEM_GLOBAL), + object_volume_step(device, "__object_volume_step", MEM_GLOBAL), + camera_motion(device, "__camera_motion", MEM_GLOBAL), + attributes_map(device, "__attributes_map", MEM_GLOBAL), + attributes_float(device, "__attributes_float", MEM_GLOBAL), + attributes_float2(device, "__attributes_float2", MEM_GLOBAL), + attributes_float3(device, "__attributes_float3", MEM_GLOBAL), + attributes_uchar4(device, "__attributes_uchar4", MEM_GLOBAL), + light_distribution(device, "__light_distribution", MEM_GLOBAL), + lights(device, "__lights", MEM_GLOBAL), + light_background_marginal_cdf(device, "__light_background_marginal_cdf", MEM_GLOBAL), + light_background_conditional_cdf(device, "__light_background_conditional_cdf", MEM_GLOBAL), + particles(device, "__particles", MEM_GLOBAL), + svm_nodes(device, "__svm_nodes", MEM_GLOBAL), + shaders(device, "__shaders", MEM_GLOBAL), + lookup_table(device, "__lookup_table", MEM_GLOBAL), + sample_pattern_lut(device, "__sample_pattern_lut", MEM_GLOBAL), + ies_lights(device, "__ies", MEM_GLOBAL) { memset((void *)&data, 0, sizeof(data)); } Scene::Scene(const SceneParams ¶ms_, Device *device) - : name("Scene"), device(device), dscene(device), params(params_) + : name("Scene"), + default_surface(NULL), + default_volume(NULL), + default_light(NULL), + default_background(NULL), + default_empty(NULL), + device(device), + dscene(device), + params(params_) { memset((void *)&dscene.data, 0, sizeof(dscene.data)); @@ -94,7 +103,7 @@ Scene::Scene(const SceneParams ¶ms_, Device *device) film = new Film(); background = new Background(); light_manager = new LightManager(); - mesh_manager = new MeshManager(); + geometry_manager = new GeometryManager(); object_manager = new ObjectManager(); integrator = new Integrator(); image_manager = new ImageManager(device->info); @@ -104,9 +113,11 @@ Scene::Scene(const SceneParams ¶ms_, Device *device) /* OSL only works on the CPU */ if (device->info.has_osl) - shader_manager = ShaderManager::create(this, params.shadingsystem); + shader_manager = ShaderManager::create(params.shadingsystem); else - shader_manager = ShaderManager::create(this, SHADINGSYSTEM_SVM); + shader_manager = ShaderManager::create(SHADINGSYSTEM_SVM); + + shader_manager->add_default(this); } Scene::~Scene() @@ -118,8 +129,8 @@ void Scene::free_memory(bool final) { foreach (Shader *s, shaders) delete s; - foreach (Mesh *m, meshes) - delete m; + foreach (Geometry *g, geometry) + delete g; foreach (Object *o, objects) delete o; foreach (Light *l, lights) @@ -128,7 +139,7 @@ void Scene::free_memory(bool final) delete p; shaders.clear(); - meshes.clear(); + geometry.clear(); objects.clear(); lights.clear(); particle_systems.clear(); @@ -140,7 +151,7 @@ void Scene::free_memory(bool final) integrator->device_free(device, &dscene); object_manager->device_free(device, &dscene); - mesh_manager->device_free(device, &dscene); + geometry_manager->device_free(device, &dscene); shader_manager->device_free(device, &dscene, this); light_manager->device_free(device, &dscene); @@ -165,7 +176,7 @@ void Scene::free_memory(bool final) delete background; delete integrator; delete object_manager; - delete mesh_manager; + delete geometry_manager; delete shader_manager; delete light_manager; delete particle_system_manager; @@ -211,7 +222,7 @@ void Scene::device_update(Device *device_, Progress &progress) if (progress.get_cancel() || device->have_error()) return; - mesh_manager->device_update_preprocess(device, this, progress); + geometry_manager->device_update_preprocess(device, this, progress); if (progress.get_cancel() || device->have_error()) return; @@ -235,7 +246,7 @@ void Scene::device_update(Device *device_, Progress &progress) return; progress.set_status("Updating Meshes"); - mesh_manager->device_update(device, &dscene, this, progress); + geometry_manager->device_update(device, &dscene, this, progress); if (progress.get_cancel() || device->have_error()) return; @@ -356,8 +367,8 @@ bool Scene::need_update() bool Scene::need_data_update() { return (background->need_update || image_manager->need_update || object_manager->need_update || - mesh_manager->need_update || light_manager->need_update || lookup_tables->need_update || - integrator->need_update || shader_manager->need_update || + geometry_manager->need_update || light_manager->need_update || + lookup_tables->need_update || integrator->need_update || shader_manager->need_update || particle_system_manager->need_update || curve_system_manager->need_update || bake_manager->need_update || film->need_update); } @@ -379,7 +390,7 @@ void Scene::reset() background->tag_update(this); integrator->tag_update(this); object_manager->tag_update(this); - mesh_manager->tag_update(this); + geometry_manager->tag_update(this); light_manager->tag_update(this); particle_system_manager->tag_update(this); curve_system_manager->tag_update(this); @@ -392,7 +403,7 @@ void Scene::device_free() void Scene::collect_statistics(RenderStats *stats) { - mesh_manager->collect_statistics(this, stats); + geometry_manager->collect_statistics(this, stats); image_manager->collect_statistics(stats); } diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index f99510d2d42..6b10a901d7b 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -44,8 +44,8 @@ class Integrator; class Light; class LightManager; class LookupTables; -class Mesh; -class MeshManager; +class Geometry; +class GeometryManager; class Object; class ObjectManager; class ParticleSystemManager; @@ -91,6 +91,7 @@ class DeviceScene { device_vector<Transform> object_motion_pass; device_vector<DecomposedTransform> object_motion; device_vector<uint> object_flag; + device_vector<float> object_volume_step; /* cameras */ device_vector<DecomposedTransform> camera_motion; @@ -119,7 +120,7 @@ class DeviceScene { device_vector<float> lookup_table; /* integrator */ - device_vector<uint> sobol_directions; + device_vector<uint> sample_pattern_lut; /* ies lights */ device_vector<float> ies_lights; @@ -213,7 +214,7 @@ class Scene { /* data lists */ vector<Object *> objects; - vector<Mesh *> meshes; + vector<Geometry *> geometry; vector<Shader *> shaders; vector<Light *> lights; vector<ParticleSystem *> particle_systems; @@ -222,7 +223,7 @@ class Scene { ImageManager *image_manager; LightManager *light_manager; ShaderManager *shader_manager; - MeshManager *mesh_manager; + GeometryManager *geometry_manager; ObjectManager *object_manager; ParticleSystemManager *particle_system_manager; CurveSystemManager *curve_system_manager; @@ -230,6 +231,7 @@ class Scene { /* default shaders */ Shader *default_surface; + Shader *default_volume; Shader *default_light; Shader *default_background; Shader *default_empty; diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index c77a20787f5..b1b30979b0e 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -14,12 +14,13 @@ * limitations under the License. */ -#include <string.h> #include <limits.h> +#include <string.h> +#include "device/device.h" +#include "render/bake.h" #include "render/buffers.h" #include "render/camera.h" -#include "device/device.h" #include "render/graph.h" #include "render/integrator.h" #include "render/light.h" @@ -27,7 +28,6 @@ #include "render/object.h" #include "render/scene.h" #include "render/session.h" -#include "render/bake.h" #include "util/util_foreach.h" #include "util/util_function.h" @@ -183,7 +183,8 @@ bool Session::draw_gpu(BufferParams &buffer_params, DeviceDrawParams &draw_param if (gpu_draw_ready) { /* then verify the buffers have the expected size, so we don't * draw previous results in a resized window */ - if (!buffer_params.modified(display->params)) { + if (buffer_params.width == display->params.width && + buffer_params.height == display->params.height) { /* for CUDA we need to do tone-mapping still, since we can * only access GL buffers from the main thread. */ if (gpu_need_display_buffer_update) { @@ -211,6 +212,7 @@ void Session::run_gpu() reset_time = time_dt(); last_update_time = time_dt(); + last_display_time = last_update_time; progress.set_render_start_time(); @@ -291,11 +293,15 @@ void Session::run_gpu() * reset and draw in between */ thread_scoped_lock buffers_lock(buffers_mutex); + /* avoid excessive denoising in viewport after reaching a certain amount of samples */ + bool need_denoise = tile_manager.schedule_denoising || tile_manager.state.sample < 20 || + (time_dt() - last_display_time) >= params.progressive_update_timeout; + /* update status and timing */ update_status_time(); /* render */ - render(); + render(need_denoise); device->task_wait(); @@ -305,7 +311,7 @@ void Session::run_gpu() /* update status and timing */ update_status_time(); - gpu_need_display_buffer_update = true; + gpu_need_display_buffer_update = need_denoise || !params.run_denoising; gpu_draw_ready = true; progress.set_update(); @@ -359,7 +365,8 @@ bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_param if (display->draw_ready()) { /* then verify the buffers have the expected size, so we don't * draw previous results in a resized window */ - if (!buffer_params.modified(display->params)) { + if (buffer_params.width == display->params.width && + buffer_params.height == display->params.height) { display->draw(device, draw_params); if (display_outdated && (time_dt() - reset_time) > params.text_timeout) @@ -372,7 +379,7 @@ bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_param return false; } -bool Session::acquire_tile(Device *tile_device, RenderTile &rtile) +bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_types) { if (progress.get_cancel()) { if (params.progressive_refine == false) { @@ -387,8 +394,14 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile) Tile *tile; int device_num = device->device_number(tile_device); - if (!tile_manager.next_tile(tile, device_num)) + while (!tile_manager.next_tile(tile, device_num, tile_types)) { + /* Wait for denoising tiles to become available */ + if ((tile_types & RenderTile::DENOISE) && !progress.get_cancel() && tile_manager.has_tiles()) { + denoising_cond.wait(tile_lock); + continue; + } return false; + } /* fill render tile */ rtile.x = tile_manager.state.buffer.full_x + tile->x; @@ -399,7 +412,7 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile) rtile.num_samples = tile_manager.state.num_samples; rtile.resolution = tile_manager.state.resolution_divider; rtile.tile_index = tile->index; - rtile.task = (tile->state == Tile::DENOISE) ? RenderTile::DENOISE : RenderTile::PATH_TRACE; + rtile.task = tile->state == Tile::DENOISE ? RenderTile::DENOISE : RenderTile::PATH_TRACE; tile_lock.unlock(); @@ -413,6 +426,9 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile) device->map_tile(tile_device, rtile); + /* Reset copy state, since buffer contents change after the tile was acquired */ + buffers->map_neighbor_copied = false; + return true; } @@ -429,6 +445,8 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile) tile->buffers->reset(buffer_params); } + tile->buffers->map_neighbor_copied = false; + tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride); rtile.buffer = tile->buffers->buffer.device_pointer; @@ -484,45 +502,75 @@ void Session::release_tile(RenderTile &rtile) } update_status_time(); + + /* Notify denoising thread that a tile was finished. */ + denoising_cond.notify_all(); } void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device) { thread_scoped_lock tile_lock(tile_mutex); - int center_idx = tiles[4].tile_index; - assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE); - BufferParams buffer_params = tile_manager.params; - int4 image_region = make_int4(buffer_params.full_x, - buffer_params.full_y, - buffer_params.full_x + buffer_params.width, - buffer_params.full_y + buffer_params.height); - - for (int dy = -1, i = 0; dy <= 1; dy++) { - for (int dx = -1; dx <= 1; dx++, i++) { - int px = tiles[4].x + dx * params.tile_size.x; - int py = tiles[4].y + dy * params.tile_size.y; - if (px >= image_region.x && py >= image_region.y && px < image_region.z && - py < image_region.w) { - int tile_index = center_idx + dy * tile_manager.state.tile_stride + dx; - Tile *tile = &tile_manager.state.tiles[tile_index]; - assert(tile->buffers); - - tiles[i].buffer = tile->buffers->buffer.device_pointer; - tiles[i].x = tile_manager.state.buffer.full_x + tile->x; - tiles[i].y = tile_manager.state.buffer.full_y + tile->y; - tiles[i].w = tile->w; - tiles[i].h = tile->h; - tiles[i].buffers = tile->buffers; - - tile->buffers->params.get_offset_stride(tiles[i].offset, tiles[i].stride); - } - else { - tiles[i].buffer = (device_ptr)NULL; - tiles[i].buffers = NULL; - tiles[i].x = clamp(px, image_region.x, image_region.z); - tiles[i].y = clamp(py, image_region.y, image_region.w); - tiles[i].w = tiles[i].h = 0; + const int4 image_region = make_int4( + tile_manager.state.buffer.full_x, + tile_manager.state.buffer.full_y, + tile_manager.state.buffer.full_x + tile_manager.state.buffer.width, + tile_manager.state.buffer.full_y + tile_manager.state.buffer.height); + + if (!tile_manager.schedule_denoising) { + /* Fix up tile slices with overlap. */ + if (tile_manager.slice_overlap != 0) { + int y = max(tiles[4].y - tile_manager.slice_overlap, image_region.y); + tiles[4].h = min(tiles[4].y + tiles[4].h + tile_manager.slice_overlap, image_region.w) - y; + tiles[4].y = y; + } + + /* Tiles are not being denoised individually, which means the entire image is processed. */ + tiles[3].x = tiles[4].x; + tiles[1].y = tiles[4].y; + tiles[5].x = tiles[4].x + tiles[4].w; + tiles[7].y = tiles[4].y + tiles[4].h; + } + else { + int center_idx = tiles[4].tile_index; + assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE); + + for (int dy = -1, i = 0; dy <= 1; dy++) { + for (int dx = -1; dx <= 1; dx++, i++) { + int nindex = tile_manager.get_neighbor_index(center_idx, i); + if (nindex >= 0) { + Tile *tile = &tile_manager.state.tiles[nindex]; + + tiles[i].x = image_region.x + tile->x; + tiles[i].y = image_region.y + tile->y; + tiles[i].w = tile->w; + tiles[i].h = tile->h; + + if (buffers) { + tile_manager.state.buffer.get_offset_stride(tiles[i].offset, tiles[i].stride); + + tiles[i].buffer = buffers->buffer.device_pointer; + tiles[i].buffers = buffers; + } + else { + assert(tile->buffers); + tile->buffers->params.get_offset_stride(tiles[i].offset, tiles[i].stride); + + tiles[i].buffer = tile->buffers->buffer.device_pointer; + tiles[i].buffers = tile->buffers; + } + } + else { + int px = tiles[4].x + dx * params.tile_size.x; + int py = tiles[4].y + dy * params.tile_size.y; + + tiles[i].x = clamp(px, image_region.x, image_region.z); + tiles[i].y = clamp(py, image_region.y, image_region.w); + tiles[i].w = tiles[i].h = 0; + + tiles[i].buffer = (device_ptr)NULL; + tiles[i].buffers = NULL; + } } } } @@ -545,6 +593,7 @@ void Session::run_cpu() bool tiles_written = false; last_update_time = time_dt(); + last_display_time = last_update_time; { /* reset once to start */ @@ -575,7 +624,7 @@ void Session::run_cpu() } /* Don't go in pause mode when preview kernels are used - * When feature kernels become available the session will be resetted. */ + * When feature kernels become available the session will be reset. */ else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) { time_sleep(0.1); } @@ -620,11 +669,6 @@ void Session::run_cpu() } if (!no_tiles) { - /* buffers mutex is locked entirely while rendering each - * sample, and released/reacquired on each iteration to allow - * reset and draw in between */ - thread_scoped_lock buffers_lock(buffers_mutex); - /* update scene */ scoped_timer update_timer; if (update_scene()) { @@ -638,17 +682,26 @@ void Session::run_cpu() if (progress.get_cancel()) break; + /* buffers mutex is locked entirely while rendering each + * sample, and released/reacquired on each iteration to allow + * reset and draw in between */ + thread_scoped_lock buffers_lock(buffers_mutex); + + /* avoid excessive denoising in viewport after reaching a certain amount of samples */ + bool need_denoise = tile_manager.schedule_denoising || tile_manager.state.sample < 20 || + (time_dt() - last_display_time) >= params.progressive_update_timeout; + /* update status and timing */ update_status_time(); /* render */ - render(); + render(need_denoise); /* update status and timing */ update_status_time(); if (!params.background) - need_copy_to_display_buffer = true; + need_copy_to_display_buffer = need_denoise || !params.run_denoising; if (!device->error_message().empty()) progress.set_error(device->error_message()); @@ -701,23 +754,26 @@ DeviceRequestedFeatures Session::get_requested_device_features() requested_features.use_object_motion = false; requested_features.use_camera_motion = use_motion && scene->camera->use_motion(); foreach (Object *object, scene->objects) { - Mesh *mesh = object->mesh; - if (mesh->num_curves()) { - requested_features.use_hair = true; - } + Geometry *geom = object->geometry; if (use_motion) { - requested_features.use_object_motion |= object->use_motion() | mesh->use_motion_blur; - requested_features.use_camera_motion |= mesh->use_motion_blur; + requested_features.use_object_motion |= object->use_motion() | geom->use_motion_blur; + requested_features.use_camera_motion |= geom->use_motion_blur; } -#ifdef WITH_OPENSUBDIV - if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) { - requested_features.use_patch_evaluation = true; - } -#endif if (object->is_shadow_catcher) { requested_features.use_shadow_tricks = true; } - requested_features.use_true_displacement |= mesh->has_true_displacement(); + if (geom->type == Geometry::MESH) { + Mesh *mesh = static_cast<Mesh *>(geom); +#ifdef WITH_OPENSUBDIV + if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) { + requested_features.use_patch_evaluation = true; + } +#endif + requested_features.use_true_displacement |= mesh->has_true_displacement(); + } + else if (geom->type == Geometry::HAIR) { + requested_features.use_hair = true; + } } requested_features.use_background_light = scene->light_manager->has_background_light(scene); @@ -842,9 +898,6 @@ void Session::set_samples(int samples) params.samples = samples; tile_manager.set_samples(samples); - { - thread_scoped_lock pause_lock(pause_mutex); - } pause_cond.notify_all(); } } @@ -866,6 +919,29 @@ void Session::set_pause(bool pause_) pause_cond.notify_all(); } +void Session::set_denoising(bool denoising, bool optix_denoising) +{ + /* Lock buffers so no denoising operation is triggered while the settings are changed here. */ + thread_scoped_lock buffers_lock(buffers_mutex); + + params.run_denoising = denoising; + params.full_denoising = !optix_denoising; + params.optix_denoising = optix_denoising; + + // TODO(pmours): Query the required overlap value for denoising from the device? + tile_manager.slice_overlap = denoising && !params.background ? 64 : 0; + tile_manager.schedule_denoising = denoising && !buffers; +} + +void Session::set_denoising_start_sample(int sample) +{ + if (sample != params.denoising_start_sample) { + params.denoising_start_sample = sample; + + pause_cond.notify_all(); + } +} + void Session::wait() { if (session_thread) { @@ -900,7 +976,7 @@ bool Session::update_scene() Integrator *integrator = scene->integrator; BakeManager *bake_manager = scene->bake_manager; - if (integrator->sampling_pattern == SAMPLING_PATTERN_CMJ || bake_manager->get_baking()) { + if (integrator->sampling_pattern != SAMPLING_PATTERN_SOBOL || bake_manager->get_baking()) { int aa_samples = tile_manager.num_samples; if (aa_samples != integrator->aa_samples) { @@ -911,7 +987,8 @@ bool Session::update_scene() /* update scene */ if (scene->need_update()) { - bool new_kernels_needed = load_kernels(false); + /* Updated used shader tag so we know which features are need for the kernel. */ + scene->shader_manager->update_shaders_used(scene); /* Update max_closures. */ KernelIntegrator *kintegrator = &scene->dscene.data.integrator; @@ -923,6 +1000,9 @@ bool Session::update_scene() kintegrator->max_closures = MAX_CLOSURE; } + /* Load render kernels, before device update where we upload data to the GPU. */ + bool new_kernels_needed = load_kernels(false); + progress.set_status("Updating Scene"); MEM_GUARDED_CALL(&progress, scene->device_update, device, progress); @@ -1003,17 +1083,21 @@ void Session::update_status_time(bool show_pause, bool show_done) progress.set_status(status, substatus); } -void Session::render() +void Session::render(bool with_denoising) { - /* Clear buffers. */ if (buffers && tile_manager.state.sample == tile_manager.range_start_sample) { + /* Clear buffers. */ buffers->zero(); } + if (tile_manager.state.buffer.width == 0 || tile_manager.state.buffer.height == 0) { + return; /* Avoid empty launches. */ + } + /* Add path trace task. */ DeviceTask task(DeviceTask::RENDER); - task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2); + task.acquire_tile = function_bind(&Session::acquire_tile, this, _2, _1, _3); task.release_tile = function_bind(&Session::release_tile, this, _1); task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2); task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2); @@ -1022,13 +1106,37 @@ void Session::render() task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2); task.need_finish_queue = params.progressive_refine; task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH; - task.requested_tile_size = params.tile_size; - task.passes_size = tile_manager.params.get_passes_size(); - if (params.run_denoising) { + task.adaptive_sampling.use = (scene->integrator->sampling_pattern == SAMPLING_PATTERN_PMJ) && + scene->dscene.data.film.pass_adaptive_aux_buffer; + task.adaptive_sampling.min_samples = scene->dscene.data.integrator.adaptive_min_samples; + + /* Acquire render tiles by default. */ + task.tile_types = RenderTile::PATH_TRACE; + + with_denoising = params.run_denoising && with_denoising; + if (with_denoising) { + /* Do not denoise viewport until the sample at which denoising should start is reached. */ + if (!params.background && tile_manager.state.sample < params.denoising_start_sample) { + with_denoising = false; + } + + /* Cannot denoise with resolution divider and separate denoising devices. + * It breaks the copy in 'MultiDevice::map_neighbor_tiles' (which operates on the full buffer + * dimensions and not the scaled ones). */ + if (!params.device.denoising_devices.empty() && tile_manager.state.resolution_divider > 1) { + with_denoising = false; + } + + /* It can happen that denoising was already enabled, but the scene still needs an update. */ + if (scene->film->need_update || !scene->film->denoising_data_offset) { + with_denoising = false; + } + } + + if (with_denoising) { task.denoising = params.denoising; - assert(!scene->film->need_update); task.pass_stride = scene->film->pass_stride; task.target_pass_stride = task.pass_stride; task.pass_denoising_data = scene->film->denoising_data_offset; @@ -1038,6 +1146,30 @@ void Session::render() task.denoising_do_filter = params.full_denoising; task.denoising_use_optix = params.optix_denoising; task.denoising_write_passes = params.write_denoising_passes; + + if (tile_manager.schedule_denoising) { + /* Acquire denoising tiles during rendering. */ + task.tile_types |= RenderTile::DENOISE; + } + else { + assert(buffers); + + /* Schedule rendering and wait for it to finish. */ + device->task_add(task); + device->task_wait(); + + /* Then run denoising on the whole image at once. */ + task.type = DeviceTask::DENOISE_BUFFER; + task.x = tile_manager.state.buffer.full_x; + task.y = tile_manager.state.buffer.full_y; + task.w = tile_manager.state.buffer.width; + task.h = tile_manager.state.buffer.height; + task.buffer = buffers->buffer.device_pointer; + task.sample = tile_manager.state.sample; + task.num_samples = tile_manager.state.num_samples; + tile_manager.state.buffer.get_offset_stride(task.offset, task.stride); + task.buffers = buffers; + } } device->task_add(task); @@ -1064,6 +1196,8 @@ void Session::copy_to_display_buffer(int sample) /* set display to new size */ display->draw_set(task.w, task.h); + + last_display_time = time_dt(); } display_outdated = false; @@ -1141,8 +1275,11 @@ int Session::get_max_closure_count() int max_closures = 0; for (int i = 0; i < scene->shaders.size(); i++) { - int num_closures = scene->shaders[i]->graph->get_num_closures(); - max_closures = max(max_closures, num_closures); + Shader *shader = scene->shaders[i]; + if (shader->used) { + int num_closures = shader->graph->get_num_closures(); + max_closures = max(max_closures, num_closures); + } } max_closure_global = max(max_closure_global, max_closures); diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index ec465601541..61970d87e9c 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -17,8 +17,8 @@ #ifndef __SESSION_H__ #define __SESSION_H__ -#include "render/buffers.h" #include "device/device.h" +#include "render/buffers.h" #include "render/shader.h" #include "render/stats.h" #include "render/tile.h" @@ -53,8 +53,10 @@ class SessionParams { int2 tile_size; TileOrder tile_order; int start_resolution; + int denoising_start_sample; int pixel_size; int threads; + bool adaptive_sampling; bool use_profiling; @@ -85,8 +87,10 @@ class SessionParams { samples = 1024; tile_size = make_int2(64, 64); start_resolution = INT_MAX; + denoising_start_sample = 0; pixel_size = 1; threads = 0; + adaptive_sampling = false; use_profiling = false; @@ -109,11 +113,13 @@ class SessionParams { bool modified(const SessionParams ¶ms) { return !(device == params.device && background == params.background && - progressive_refine == params.progressive_refine - /* && samples == params.samples */ - && progressive == params.progressive && experimental == params.experimental && + progressive_refine == params.progressive_refine && + /* samples == params.samples && denoising_start_sample == + params.denoising_start_sample && */ + progressive == params.progressive && experimental == params.experimental && tile_size == params.tile_size && start_resolution == params.start_resolution && pixel_size == params.pixel_size && threads == params.threads && + adaptive_sampling == params.adaptive_sampling && use_profiling == params.use_profiling && display_buffer_linear == params.display_buffer_linear && cancel_timeout == params.cancel_timeout && reset_timeout == params.reset_timeout && @@ -152,8 +158,10 @@ class Session { bool ready_to_reset(); void reset(BufferParams ¶ms, int samples); - void set_samples(int samples); void set_pause(bool pause); + void set_samples(int samples); + void set_denoising(bool denoising, bool optix_denoising); + void set_denoising_start_sample(int sample); bool update_scene(); bool load_kernels(bool lock_scene = true); @@ -178,8 +186,9 @@ class Session { void update_status_time(bool show_pause = false, bool show_done = false); + void render(bool with_denoising); void copy_to_display_buffer(int sample); - void render(); + void reset_(BufferParams ¶ms, int samples); void run_cpu(); @@ -190,7 +199,7 @@ class Session { bool draw_gpu(BufferParams ¶ms, DeviceDrawParams &draw_params); void reset_gpu(BufferParams ¶ms, int samples); - bool acquire_tile(Device *tile_device, RenderTile &tile); + bool acquire_tile(RenderTile &tile, Device *tile_device, uint tile_types); void update_tile_sample(RenderTile &tile); void release_tile(RenderTile &tile); @@ -213,14 +222,16 @@ class Session { thread_mutex tile_mutex; thread_mutex buffers_mutex; thread_mutex display_mutex; + thread_condition_variable denoising_cond; bool kernels_loaded; DeviceRequestedFeatures loaded_kernel_features; double reset_time; + double last_update_time; + double last_display_time; /* progressive refine */ - double last_update_time; bool update_progressive_refine(bool cancel); DeviceRequestedFeatures get_requested_device_features(); diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp index 661208c6463..747fc58f81a 100644 --- a/intern/cycles/render/shader.cpp +++ b/intern/cycles/render/shader.cpp @@ -168,7 +168,7 @@ NODE_DEFINE(Shader) SOCKET_ENUM(volume_sampling_method, "Volume Sampling Method", volume_sampling_method_enum, - VOLUME_SAMPLING_DISTANCE); + VOLUME_SAMPLING_MULTIPLE_IMPORTANCE); static NodeEnum volume_interpolation_method_enum; volume_interpolation_method_enum.insert("linear", VOLUME_INTERPOLATION_LINEAR); @@ -178,6 +178,8 @@ NODE_DEFINE(Shader) volume_interpolation_method_enum, VOLUME_INTERPOLATION_LINEAR); + SOCKET_FLOAT(volume_step_rate, "Volume Step Rate", 1.0f); + static NodeEnum displacement_method_enum; displacement_method_enum.insert("bump", DISPLACE_BUMP); displacement_method_enum.insert("true", DISPLACE_TRUE); @@ -203,10 +205,11 @@ Shader::Shader() : Node(node_type) has_bssrdf_bump = false; has_surface_spatial_varying = false; has_volume_spatial_varying = false; + has_volume_attribute_dependency = false; has_object_dependency = false; - has_attribute_dependency = false; has_integrator_dependency = false; has_volume_connected = false; + prev_volume_step_rate = 0.0f; displacement_method = DISPLACE_BUMP; @@ -214,7 +217,7 @@ Shader::Shader() : Node(node_type) used = false; need_update = true; - need_update_mesh = true; + need_update_geometry = true; need_sync_object = false; } @@ -288,7 +291,7 @@ void Shader::set_graph(ShaderGraph *graph_) const char *new_hash = (graph_) ? graph_->displacement_hash.c_str() : ""; if (strcmp(old_hash, new_hash) != 0) { - need_update_mesh = true; + need_update_geometry = true; } } @@ -347,15 +350,16 @@ void Shader::tag_update(Scene *scene) } /* compare if the attributes changed, mesh manager will check - * need_update_mesh, update the relevant meshes and clear it. */ + * need_update_geometry, update the relevant meshes and clear it. */ if (attributes.modified(prev_attributes)) { - need_update_mesh = true; - scene->mesh_manager->need_update = true; + need_update_geometry = true; + scene->geometry_manager->need_update = true; } - if (has_volume != prev_has_volume) { - scene->mesh_manager->need_flags_update = true; + if (has_volume != prev_has_volume || volume_step_rate != prev_volume_step_rate) { + scene->geometry_manager->need_flags_update = true; scene->object_manager->need_flags_update = true; + prev_volume_step_rate = volume_step_rate; } } @@ -415,7 +419,7 @@ ShaderManager::~ShaderManager() { } -ShaderManager *ShaderManager::create(Scene *scene, int shadingsystem) +ShaderManager *ShaderManager::create(int shadingsystem) { ShaderManager *manager; @@ -431,8 +435,6 @@ ShaderManager *ShaderManager::create(Scene *scene, int shadingsystem) manager = new SVMShaderManager(); } - add_default(scene); - return manager; } @@ -471,8 +473,12 @@ int ShaderManager::get_shader_id(Shader *shader, bool smooth) return id; } -void ShaderManager::device_update_shaders_used(Scene *scene) +void ShaderManager::update_shaders_used(Scene *scene) { + if (!need_update) { + return; + } + /* figure out which shaders are in use, so SVM/OSL can skip compiling them * for speed and avoid loading image textures into memory */ uint id = 0; @@ -489,8 +495,8 @@ void ShaderManager::device_update_shaders_used(Scene *scene) if (scene->background->shader) scene->background->shader->used = true; - foreach (Mesh *mesh, scene->meshes) - foreach (Shader *shader, mesh->used_shaders) + foreach (Geometry *geom, scene->geometry) + foreach (Shader *shader, geom->used_shaders) shader->used = true; foreach (Light *light, scene->lights) @@ -531,10 +537,12 @@ void ShaderManager::device_update_common(Device *device, /* in this case we can assume transparent surface */ if (shader->has_volume_connected && !shader->has_surface) flag |= SD_HAS_ONLY_VOLUME; - if (shader->heterogeneous_volume && shader->has_volume_spatial_varying) - flag |= SD_HETEROGENEOUS_VOLUME; - if (shader->has_attribute_dependency) - flag |= SD_NEED_ATTRIBUTES; + if (shader->has_volume) { + if (shader->heterogeneous_volume && shader->has_volume_spatial_varying) + flag |= SD_HETEROGENEOUS_VOLUME; + } + if (shader->has_volume_attribute_dependency) + flag |= SD_NEED_VOLUME_ATTRIBUTES; if (shader->has_bssrdf_bump) flag |= SD_HAS_BSSRDF_BUMP; if (device->info.has_volume_decoupled) { @@ -623,9 +631,27 @@ void ShaderManager::add_default(Scene *scene) Shader *shader = new Shader(); shader->name = "default_surface"; - shader->graph = graph; + shader->set_graph(graph); scene->shaders.push_back(shader); scene->default_surface = shader; + shader->tag_update(scene); + } + + /* default volume */ + { + ShaderGraph *graph = new ShaderGraph(); + + PrincipledVolumeNode *principled = new PrincipledVolumeNode(); + graph->add(principled); + + graph->connect(principled->output("Volume"), graph->output()->input("Volume")); + + Shader *shader = new Shader(); + shader->name = "default_volume"; + shader->set_graph(graph); + scene->shaders.push_back(shader); + scene->default_volume = shader; + shader->tag_update(scene); } /* default light */ @@ -641,9 +667,10 @@ void ShaderManager::add_default(Scene *scene) Shader *shader = new Shader(); shader->name = "default_light"; - shader->graph = graph; + shader->set_graph(graph); scene->shaders.push_back(shader); scene->default_light = shader; + shader->tag_update(scene); } /* default background */ @@ -652,9 +679,10 @@ void ShaderManager::add_default(Scene *scene) Shader *shader = new Shader(); shader->name = "default_background"; - shader->graph = graph; + shader->set_graph(graph); scene->shaders.push_back(shader); scene->default_background = shader; + shader->tag_update(scene); } /* default empty */ @@ -663,9 +691,10 @@ void ShaderManager::add_default(Scene *scene) Shader *shader = new Shader(); shader->name = "default_empty"; - shader->graph = graph; + shader->set_graph(graph); scene->shaders.push_back(shader); scene->default_empty = shader; + shader->tag_update(scene); } } @@ -704,6 +733,10 @@ void ShaderManager::get_requested_features(Scene *scene, requested_features->nodes_features = 0; for (int i = 0; i < scene->shaders.size(); i++) { Shader *shader = scene->shaders[i]; + if (!shader->used) { + continue; + } + /* Gather requested features from all the nodes from the graph nodes. */ get_requested_graph_features(shader->graph, requested_features); ShaderNode *output_node = shader->graph->output(); diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h index f74204df355..7801fd29276 100644 --- a/intern/cycles/render/shader.h +++ b/intern/cycles/render/shader.h @@ -23,8 +23,8 @@ # include <OSL/oslexec.h> #endif -#include "render/attribute.h" #include "kernel/kernel_types.h" +#include "render/attribute.h" #include "graph/node.h" @@ -92,10 +92,12 @@ class Shader : public Node { bool heterogeneous_volume; VolumeSampling volume_sampling_method; int volume_interpolation_method; + float volume_step_rate; + float prev_volume_step_rate; /* synchronization */ bool need_update; - bool need_update_mesh; + bool need_update_geometry; bool need_sync_object; /* If the shader has only volume components, the surface is assumed to @@ -118,8 +120,8 @@ class Shader : public Node { bool has_bssrdf_bump; bool has_surface_spatial_varying; bool has_volume_spatial_varying; + bool has_volume_attribute_dependency; bool has_object_dependency; - bool has_attribute_dependency; bool has_integrator_dependency; /* displacement */ @@ -163,7 +165,7 @@ class ShaderManager { public: bool need_update; - static ShaderManager *create(Scene *scene, int shadingsystem); + static ShaderManager *create(int shadingsystem); virtual ~ShaderManager(); virtual void reset(Scene *scene) = 0; @@ -180,7 +182,6 @@ class ShaderManager { Progress &progress) = 0; virtual void device_free(Device *device, DeviceScene *dscene, Scene *scene) = 0; - void device_update_shaders_used(Scene *scene); void device_update_common(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress); void device_free_common(Device *device, DeviceScene *dscene, Scene *scene); @@ -196,6 +197,7 @@ class ShaderManager { static void add_default(Scene *scene); /* Selective nodes compilation. */ + void update_shaders_used(Scene *scene); void get_requested_features(Scene *scene, DeviceRequestedFeatures *requested_features); static void free_memory(); diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp index 7c33f6c04ae..b4858f488c3 100644 --- a/intern/cycles/render/svm.cpp +++ b/intern/cycles/render/svm.cpp @@ -25,8 +25,8 @@ #include "render/shader.h" #include "render/svm.h" -#include "util/util_logging.h" #include "util/util_foreach.h" +#include "util/util_logging.h" #include "util/util_progress.h" #include "util/util_task.h" @@ -85,9 +85,6 @@ void SVMShaderManager::device_update(Device *device, /* test if we need to update */ device_free(device, dscene, scene); - /* determine which shaders are in use */ - device_update_shaders_used(scene); - /* Build all shaders. */ TaskPool task_pool; vector<array<int4>> shader_svm_nodes(num_shaders); @@ -447,16 +444,14 @@ void SVMCompiler::generate_node(ShaderNode *node, ShaderNodeSet &done) else if (current_type == SHADER_TYPE_VOLUME) { if (node->has_spatial_varying()) current_shader->has_volume_spatial_varying = true; + if (node->has_attribute_dependency()) + current_shader->has_volume_attribute_dependency = true; } if (node->has_object_dependency()) { current_shader->has_object_dependency = true; } - if (node->has_attribute_dependency()) { - current_shader->has_attribute_dependency = true; - } - if (node->has_integrator_dependency()) { current_shader->has_integrator_dependency = true; } @@ -867,8 +862,8 @@ void SVMCompiler::compile(Shader *shader, array<int4> &svm_nodes, int index, Sum shader->has_displacement = false; shader->has_surface_spatial_varying = false; shader->has_volume_spatial_varying = false; + shader->has_volume_attribute_dependency = false; shader->has_object_dependency = false; - shader->has_attribute_dependency = false; shader->has_integrator_dependency = false; /* generate bump shader */ diff --git a/intern/cycles/render/tables.cpp b/intern/cycles/render/tables.cpp index d88925939e3..270e05abe29 100644 --- a/intern/cycles/render/tables.cpp +++ b/intern/cycles/render/tables.cpp @@ -14,9 +14,9 @@ * limitations under the License. */ +#include "render/tables.h" #include "device/device.h" #include "render/scene.h" -#include "render/tables.h" #include "util/util_logging.h" diff --git a/intern/cycles/render/tables.h b/intern/cycles/render/tables.h index 12b59bb0aeb..3ed2959ae59 100644 --- a/intern/cycles/render/tables.h +++ b/intern/cycles/render/tables.h @@ -18,6 +18,7 @@ #define __TABLES_H__ #include "util/util_list.h" +#include "util/util_vector.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp index 9ef0c695667..1480b6d1aab 100644 --- a/intern/cycles/render/tile.cpp +++ b/intern/cycles/render/tile.cpp @@ -101,6 +101,7 @@ TileManager::TileManager(bool progressive_, tile_order = tile_order_; start_resolution = start_resolution_; pixel_size = pixel_size_; + slice_overlap = 0; num_samples = num_samples_; num_devices = num_devices_; preserve_tile_device = preserve_tile_device_; @@ -201,8 +202,7 @@ int TileManager::gen_tiles(bool sliced) int image_h = max(1, params.height / resolution); int2 center = make_int2(image_w / 2, image_h / 2); - int num_logical_devices = preserve_tile_device ? num_devices : 1; - int num = min(image_h, num_logical_devices); + int num = preserve_tile_device || sliced ? min(image_h, num_devices) : 1; int slice_num = sliced ? num : 1; int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x); @@ -216,7 +216,7 @@ int TileManager::gen_tiles(bool sliced) tile_list = state.render_tiles.begin(); if (tile_order == TILE_HILBERT_SPIRAL) { - assert(!sliced); + assert(!sliced && slice_overlap == 0); int tile_h = (tile_size.y >= image_h) ? 1 : divide_up(image_h, tile_size.y); state.tiles.resize(tile_w * tile_h); @@ -319,6 +319,12 @@ int TileManager::gen_tiles(bool sliced) int slice_h = (slice == slice_num - 1) ? image_h - slice * (image_h / slice_num) : image_h / slice_num; + if (slice_overlap != 0) { + int slice_y_offset = max(slice_y - slice_overlap, 0); + slice_h = min(slice_y + slice_h + slice_overlap, image_h) - slice_y_offset; + slice_y = slice_y_offset; + } + int tile_h = (tile_size.y >= slice_h) ? 1 : divide_up(slice_h, tile_size.y); int tiles_per_device = divide_up(tile_w * tile_h, num); @@ -363,6 +369,7 @@ void TileManager::gen_render_tiles() { /* Regenerate just the render tiles for progressive render. */ foreach (Tile &tile, state.tiles) { + tile.state = Tile::RENDER; state.render_tiles[tile.device].push_back(tile.index); } } @@ -386,17 +393,29 @@ void TileManager::set_tiles() int TileManager::get_neighbor_index(int index, int neighbor) { - static const int dx[] = {-1, 0, 1, -1, 1, -1, 0, 1, 0}, dy[] = {-1, -1, -1, 0, 0, 1, 1, 1, 0}; + /* Neighbor indices: + * 0 1 2 + * 3 4 5 + * 6 7 8 + */ + static const int dx[] = {-1, 0, 1, -1, 0, 1, -1, 0, 1}; + static const int dy[] = {-1, -1, -1, 0, 0, 0, 1, 1, 1}; int resolution = state.resolution_divider; int image_w = max(1, params.width / resolution); int image_h = max(1, params.height / resolution); + + int num = min(image_h, num_devices); + int slice_num = !background ? num : 1; + int slice_h = image_h / slice_num; + int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x); - int tile_h = (tile_size.y >= image_h) ? 1 : divide_up(image_h, tile_size.y); + int tile_h = (tile_size.y >= slice_h) ? 1 : divide_up(slice_h, tile_size.y); - int nx = state.tiles[index].x / tile_size.x + dx[neighbor], - ny = state.tiles[index].y / tile_size.y + dy[neighbor]; - if (nx < 0 || ny < 0 || nx >= tile_w || ny >= tile_h) + /* Tiles in the state tile list are always indexed from left to right, top to bottom. */ + int nx = (index % tile_w) + dx[neighbor]; + int ny = (index / tile_w) + dy[neighbor]; + if (nx < 0 || ny < 0 || nx >= tile_w || ny >= tile_h * slice_num) return -1; return ny * state.tile_stride + nx; @@ -426,15 +445,11 @@ bool TileManager::finish_tile(int index, bool &delete_tile) { delete_tile = false; - if (progressive) { - return true; - } - switch (state.tiles[index].state) { case Tile::RENDER: { if (!schedule_denoising) { state.tiles[index].state = Tile::DONE; - delete_tile = true; + delete_tile = !progressive; return true; } state.tiles[index].state = Tile::RENDERED; @@ -457,15 +472,18 @@ bool TileManager::finish_tile(int index, bool &delete_tile) int nindex = get_neighbor_index(index, neighbor); if (check_neighbor_state(nindex, Tile::DENOISED)) { state.tiles[nindex].state = Tile::DONE; - /* It can happen that the tile just finished denoising and already can be freed here. - * However, in that case it still has to be written before deleting, so we can't delete - * it yet. */ - if (neighbor == 8) { - delete_tile = true; - } - else { - delete state.tiles[nindex].buffers; - state.tiles[nindex].buffers = NULL; + /* Do not delete finished tiles in progressive mode. */ + if (!progressive) { + /* It can happen that the tile just finished denoising and already can be freed here. + * However, in that case it still has to be written before deleting, so we can't delete + * it yet. */ + if (neighbor == 4) { + delete_tile = true; + } + else { + delete state.tiles[nindex].buffers; + state.tiles[nindex].buffers = NULL; + } } } } @@ -477,27 +495,65 @@ bool TileManager::finish_tile(int index, bool &delete_tile) } } -bool TileManager::next_tile(Tile *&tile, int device) +bool TileManager::next_tile(Tile *&tile, int device, uint tile_types) { - int logical_device = preserve_tile_device ? device : 0; + /* Preserve device if requested, unless this is a separate denoising device that just wants to + * grab any available tile. */ + const bool preserve_device = preserve_tile_device && device < num_devices; - if (logical_device >= state.render_tiles.size()) - return false; + if (tile_types & RenderTile::DENOISE) { + int tile_index = -1; + int logical_device = preserve_device ? device : 0; - if (!state.denoising_tiles[logical_device].empty()) { - int idx = state.denoising_tiles[logical_device].front(); - state.denoising_tiles[logical_device].pop_front(); - tile = &state.tiles[idx]; - return true; + while (logical_device < state.denoising_tiles.size()) { + if (state.denoising_tiles[logical_device].empty()) { + if (preserve_device) { + break; + } + else { + logical_device++; + continue; + } + } + + tile_index = state.denoising_tiles[logical_device].front(); + state.denoising_tiles[logical_device].pop_front(); + break; + } + + if (tile_index >= 0) { + tile = &state.tiles[tile_index]; + return true; + } } - if (state.render_tiles[logical_device].empty()) - return false; + if (tile_types & RenderTile::PATH_TRACE) { + int tile_index = -1; + int logical_device = preserve_device ? device : 0; - int idx = state.render_tiles[logical_device].front(); - state.render_tiles[logical_device].pop_front(); - tile = &state.tiles[idx]; - return true; + while (logical_device < state.render_tiles.size()) { + if (state.render_tiles[logical_device].empty()) { + if (preserve_device) { + break; + } + else { + logical_device++; + continue; + } + } + + tile_index = state.render_tiles[logical_device].front(); + state.render_tiles[logical_device].pop_front(); + break; + } + + if (tile_index >= 0) { + tile = &state.tiles[tile_index]; + return true; + } + } + + return false; } bool TileManager::done() @@ -508,6 +564,16 @@ bool TileManager::done() (state.sample + state.num_samples >= end_sample); } +bool TileManager::has_tiles() +{ + foreach (Tile &tile, state.tiles) { + if (tile.state != Tile::DONE) { + return true; + } + } + return false; +} + bool TileManager::next() { if (done()) diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h index 017c1af0ead..9fb9c1ca782 100644 --- a/intern/cycles/render/tile.h +++ b/intern/cycles/render/tile.h @@ -89,6 +89,7 @@ class TileManager { } state; int num_samples; + int slice_overlap; TileManager(bool progressive, int num_samples, @@ -105,15 +106,19 @@ class TileManager { void reset(BufferParams ¶ms, int num_samples); void set_samples(int num_samples); bool next(); - bool next_tile(Tile *&tile, int device = 0); + bool next_tile(Tile *&tile, int device, uint tile_types); bool finish_tile(int index, bool &delete_tile); bool done(); + bool has_tiles(); void set_tile_order(TileOrder tile_order_) { tile_order = tile_order_; } + int get_neighbor_index(int index, int neighbor); + bool check_neighbor_state(int index, Tile::State state); + /* ** Sample range rendering. ** */ /* Start sample in the range. */ @@ -160,9 +165,6 @@ class TileManager { /* Generate tile list, return number of tiles. */ int gen_tiles(bool sliced); void gen_render_tiles(); - - int get_neighbor_index(int index, int neighbor); - bool check_neighbor_state(int index, Tile::State state); }; CCL_NAMESPACE_END diff --git a/intern/cycles/test/CMakeLists.txt b/intern/cycles/test/CMakeLists.txt index 98fcc8cd15e..6dcc7f7b3dd 100644 --- a/intern/cycles/test/CMakeLists.txt +++ b/intern/cycles/test/CMakeLists.txt @@ -82,25 +82,33 @@ list(APPEND ALL_CYCLES_LIBRARIES ${TIFF_LIBRARY} ${OPENIMAGEIO_LIBRARIES} ${OPENEXR_LIBRARIES} + ${OPENVDB_LIBRARIES} ) include_directories(${INC}) -link_directories(${OPENIMAGEIO_LIBPATH} - ${BOOST_LIBPATH} - ${PNG_LIBPATH} - ${JPEG_LIBPATH} - ${ZLIB_LIBPATH} - ${TIFF_LIBPATH} - ${OPENEXR_LIBPATH} - ${OPENCOLORIO_LIBPATH}) +link_directories( + ${OPENIMAGEIO_LIBPATH} + ${BOOST_LIBPATH} + ${PNG_LIBPATH} + ${JPEG_LIBPATH} + ${ZLIB_LIBPATH} + ${TIFF_LIBPATH} + ${OPENEXR_LIBPATH} + ${OPENCOLORIO_LIBPATH} + ${OPENVDB_LIBPATH} +) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${PLATFORM_LINKFLAGS}") set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} ${PLATFORM_LINKFLAGS_DEBUG}") CYCLES_TEST(render_graph_finalize "${ALL_CYCLES_LIBRARIES};bf_intern_numaapi") CYCLES_TEST(util_aligned_malloc "cycles_util") -CYCLES_TEST(util_path "cycles_util;${BOOST_LIBRARIES};${OPENIMAGEIO_LIBRARIES}") -CYCLES_TEST(util_string "cycles_util;${BOOST_LIBRARIES};${OPENIMAGEIO_LIBRARIES}") -CYCLES_TEST(util_task "cycles_util;${BOOST_LIBRARIES};${OPENIMAGEIO_LIBRARIES};bf_intern_numaapi") -CYCLES_TEST(util_time "cycles_util;${BOOST_LIBRARIES};${OPENIMAGEIO_LIBRARIES}") +CYCLES_TEST(util_path "cycles_util;${OPENIMAGEIO_LIBRARIES};${BOOST_LIBRARIES}") +CYCLES_TEST(util_string "cycles_util;${OPENIMAGEIO_LIBRARIES};${BOOST_LIBRARIES}") +CYCLES_TEST(util_task "cycles_util;${OPENIMAGEIO_LIBRARIES};${BOOST_LIBRARIES};bf_intern_numaapi") +CYCLES_TEST(util_time "cycles_util;${OPENIMAGEIO_LIBRARIES};${BOOST_LIBRARIES}") +set_source_files_properties(util_avxf_avx_test.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") +CYCLES_TEST(util_avxf_avx "cycles_util;bf_intern_numaapi;${OPENIMAGEIO_LIBRARIES};${BOOST_LIBRARIES}") +set_source_files_properties(util_avxf_avx2_test.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") +CYCLES_TEST(util_avxf_avx2 "cycles_util;bf_intern_numaapi;${OPENIMAGEIO_LIBRARIES};${BOOST_LIBRARIES}") diff --git a/intern/cycles/test/render_graph_finalize_test.cpp b/intern/cycles/test/render_graph_finalize_test.cpp index ca93f8b02d0..87389ebfb16 100644 --- a/intern/cycles/test/render_graph_finalize_test.cpp +++ b/intern/cycles/test/render_graph_finalize_test.cpp @@ -14,12 +14,12 @@ * limitations under the License. */ -#include "testing/testing.h" #include "testing/mock_log.h" +#include "testing/testing.h" #include "render/graph.h" -#include "render/scene.h" #include "render/nodes.h" +#include "render/scene.h" #include "util/util_array.h" #include "util/util_logging.h" #include "util/util_string.h" diff --git a/intern/cycles/test/util_avxf_avx2_test.cpp b/intern/cycles/test/util_avxf_avx2_test.cpp new file mode 100644 index 00000000000..9b466ddd3a0 --- /dev/null +++ b/intern/cycles/test/util_avxf_avx2_test.cpp @@ -0,0 +1,21 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#define __KERNEL_AVX2__ +#define __KERNEL_CPU__ + +#if defined(i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# include "util_avxf_test.h" +#endif diff --git a/intern/cycles/test/util_avxf_avx_test.cpp b/intern/cycles/test/util_avxf_avx_test.cpp new file mode 100644 index 00000000000..cea67649b80 --- /dev/null +++ b/intern/cycles/test/util_avxf_avx_test.cpp @@ -0,0 +1,21 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#define __KERNEL_AVX__ +#define __KERNEL_CPU__ + +#if defined(i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# include "util_avxf_test.h" +#endif diff --git a/intern/cycles/test/util_avxf_test.h b/intern/cycles/test/util_avxf_test.h new file mode 100644 index 00000000000..d93563fdb3f --- /dev/null +++ b/intern/cycles/test/util_avxf_test.h @@ -0,0 +1,222 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "testing/testing.h" +#include "util/util_system.h" +#include "util/util_types.h" + +CCL_NAMESPACE_BEGIN + +bool validate_cpu_capabilities() +{ + +#ifdef __KERNEL_AVX2__ + return system_cpu_support_avx2(); +#else +# ifdef __KERNEL_AVX__ + return system_cpu_support_avx(); +# endif +#endif +} + +#define VALIDATECPU \ + if (!validate_cpu_capabilities()) \ + return; + +#define compare_vector_scalar(a, b) \ + for (size_t index = 0; index < a.size; index++) \ + EXPECT_FLOAT_EQ(a[index], b); + +#define compare_vector_vector(a, b) \ + for (size_t index = 0; index < a.size; index++) \ + EXPECT_FLOAT_EQ(a[index], b[index]); + +#define compare_vector_vector_near(a, b, abserror) \ + for (size_t index = 0; index < a.size; index++) \ + EXPECT_NEAR(a[index], b[index], abserror); + +#define basic_test_vv(a, b, op) \ + VALIDATECPU \ + avxf c = a op b; \ + for (size_t i = 0; i < a.size; i++) \ + EXPECT_FLOAT_EQ(c[i], a[i] op b[i]); + +/* vector op float tests */ +#define basic_test_vf(a, b, op) \ + VALIDATECPU \ + avxf c = a op b; \ + for (size_t i = 0; i < a.size; i++) \ + EXPECT_FLOAT_EQ(c[i], a[i] op b); + +const avxf avxf_a(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); +const avxf avxf_b(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); +const avxf avxf_c(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f); +const float float_b = 1.5f; + +TEST(util_avx, avxf_add_vv){basic_test_vv(avxf_a, avxf_b, +)} TEST(util_avx, avxf_sub_vv){ + basic_test_vv(avxf_a, avxf_b, -)} TEST(util_avx, avxf_mul_vv){ + basic_test_vv(avxf_a, avxf_b, *)} TEST(util_avx, avxf_div_vv){ + basic_test_vv(avxf_a, avxf_b, /)} TEST(util_avx, avxf_add_vf){ + basic_test_vf(avxf_a, float_b, +)} TEST(util_avx, avxf_sub_vf){ + basic_test_vf(avxf_a, float_b, -)} TEST(util_avx, avxf_mul_vf){ + basic_test_vf(avxf_a, float_b, *)} TEST(util_avx, + avxf_div_vf){basic_test_vf(avxf_a, float_b, /)} + +TEST(util_avx, avxf_ctor) +{ + VALIDATECPU + compare_vector_scalar(avxf(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f), + static_cast<float>(index)); + compare_vector_scalar(avxf(1.0f), 1.0f); + compare_vector_vector(avxf(1.0f, 2.0f), avxf(1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f)); + compare_vector_vector(avxf(1.0f, 2.0f, 3.0f, 4.0f), + avxf(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f)); + compare_vector_vector(avxf(make_float3(1.0f, 2.0f, 3.0f)), + avxf(0.0f, 3.0f, 2.0f, 1.0f, 0.0f, 3.0f, 2.0f, 1.0f)); +} + +TEST(util_avx, avxf_sqrt) +{ + VALIDATECPU + compare_vector_vector(mm256_sqrt(avxf(1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f)), + avxf(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f)); +} + +TEST(util_avx, avxf_min_max) +{ + VALIDATECPU + compare_vector_vector(min(avxf_a, avxf_b), avxf_a); + compare_vector_vector(max(avxf_a, avxf_b), avxf_b); +} + +TEST(util_avx, avxf_set_sign) +{ + VALIDATECPU + avxf res = set_sign_bit<1, 0, 0, 0, 0, 0, 0, 0>(avxf_a); + compare_vector_vector(res, avxf(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, -0.8f)); +} + +TEST(util_avx, avxf_msub) +{ + VALIDATECPU + avxf res = msub(avxf_a, avxf_b, avxf_c); + avxf exp = avxf((avxf_a[7] * avxf_b[7]) - avxf_c[7], + (avxf_a[6] * avxf_b[6]) - avxf_c[6], + (avxf_a[5] * avxf_b[5]) - avxf_c[5], + (avxf_a[4] * avxf_b[4]) - avxf_c[4], + (avxf_a[3] * avxf_b[3]) - avxf_c[3], + (avxf_a[2] * avxf_b[2]) - avxf_c[2], + (avxf_a[1] * avxf_b[1]) - avxf_c[1], + (avxf_a[0] * avxf_b[0]) - avxf_c[0]); + compare_vector_vector(res, exp); +} + +TEST(util_avx, avxf_madd) +{ + VALIDATECPU + avxf res = madd(avxf_a, avxf_b, avxf_c); + avxf exp = avxf((avxf_a[7] * avxf_b[7]) + avxf_c[7], + (avxf_a[6] * avxf_b[6]) + avxf_c[6], + (avxf_a[5] * avxf_b[5]) + avxf_c[5], + (avxf_a[4] * avxf_b[4]) + avxf_c[4], + (avxf_a[3] * avxf_b[3]) + avxf_c[3], + (avxf_a[2] * avxf_b[2]) + avxf_c[2], + (avxf_a[1] * avxf_b[1]) + avxf_c[1], + (avxf_a[0] * avxf_b[0]) + avxf_c[0]); + compare_vector_vector(res, exp); +} + +TEST(util_avx, avxf_nmadd) +{ + VALIDATECPU + avxf res = nmadd(avxf_a, avxf_b, avxf_c); + avxf exp = avxf(avxf_c[7] - (avxf_a[7] * avxf_b[7]), + avxf_c[6] - (avxf_a[6] * avxf_b[6]), + avxf_c[5] - (avxf_a[5] * avxf_b[5]), + avxf_c[4] - (avxf_a[4] * avxf_b[4]), + avxf_c[3] - (avxf_a[3] * avxf_b[3]), + avxf_c[2] - (avxf_a[2] * avxf_b[2]), + avxf_c[1] - (avxf_a[1] * avxf_b[1]), + avxf_c[0] - (avxf_a[0] * avxf_b[0])); + compare_vector_vector(res, exp); +} + +TEST(util_avx, avxf_compare) +{ + VALIDATECPU + avxf a(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f); + avxf b(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f); + avxb res = a <= b; + int exp[8] = { + a[0] <= b[0] ? -1 : 0, + a[1] <= b[1] ? -1 : 0, + a[2] <= b[2] ? -1 : 0, + a[3] <= b[3] ? -1 : 0, + a[4] <= b[4] ? -1 : 0, + a[5] <= b[5] ? -1 : 0, + a[6] <= b[6] ? -1 : 0, + a[7] <= b[7] ? -1 : 0, + }; + compare_vector_vector(res, exp); +} + +TEST(util_avx, avxf_permute) +{ + VALIDATECPU + avxf res = permute<3, 0, 1, 7, 6, 5, 2, 4>(avxf_b); + compare_vector_vector(res, avxf(4.0f, 6.0f, 3.0f, 2.0f, 1.0f, 7.0f, 8.0f, 5.0f)); +} + +TEST(util_avx, avxf_blend) +{ + VALIDATECPU + avxf res = blend<0, 0, 1, 0, 1, 0, 1, 0>(avxf_a, avxf_b); + compare_vector_vector(res, avxf(0.1f, 0.2f, 3.0f, 0.4f, 5.0f, 0.6f, 7.0f, 0.8f)); +} + +TEST(util_avx, avxf_shuffle) +{ + VALIDATECPU + avxf res = shuffle<0, 1, 2, 3, 1, 3, 2, 0>(avxf_a); + compare_vector_vector(res, avxf(0.4f, 0.2f, 0.1f, 0.3f, 0.5f, 0.6f, 0.7f, 0.8f)); +} + +TEST(util_avx, avxf_cross) +{ + VALIDATECPU + avxf res = cross(avxf_b, avxf_c); + compare_vector_vector_near(res, + avxf(0.0f, + -9.5367432e-07f, + 0.0f, + 4.7683716e-07f, + 0.0f, + -3.8146973e-06f, + 3.8146973e-06f, + 3.8146973e-06f), + 0.000002000f); +} + +TEST(util_avx, avxf_dot3) +{ + VALIDATECPU + float den, den2; + dot3(avxf_a, avxf_b, den, den2); + EXPECT_FLOAT_EQ(den, 14.9f); + EXPECT_FLOAT_EQ(den2, 2.9f); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index ef100c12453..c1f71461dfd 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -102,6 +102,7 @@ set(SRC_HEADERS util_sky_model_data.h util_avxf.h util_avxb.h + util_semaphore.h util_sseb.h util_ssef.h util_ssei.h diff --git a/intern/cycles/util/util_algorithm.h b/intern/cycles/util/util_algorithm.h index 62093039625..63abd4e92a3 100644 --- a/intern/cycles/util/util_algorithm.h +++ b/intern/cycles/util/util_algorithm.h @@ -25,6 +25,7 @@ using std::max; using std::min; using std::remove; using std::sort; +using std::stable_sort; using std::swap; CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h index a8ea1dc925e..13d177d2b25 100644 --- a/intern/cycles/util/util_atomic.h +++ b/intern/cycles/util/util_atomic.h @@ -77,6 +77,7 @@ ccl_device_inline float atomic_compare_and_swap_float(volatile ccl_global float # define atomic_fetch_and_add_uint32(p, x) atomic_add((p), (x)) # define atomic_fetch_and_inc_uint32(p) atomic_inc((p)) # define atomic_fetch_and_dec_uint32(p) atomic_dec((p)) +# define atomic_fetch_and_or_uint32(p, x) atomic_or((p), (x)) # define CCL_LOCAL_MEM_FENCE CLK_LOCAL_MEM_FENCE # define ccl_barrier(flags) barrier(flags) @@ -91,6 +92,7 @@ ccl_device_inline float atomic_compare_and_swap_float(volatile ccl_global float # define atomic_fetch_and_sub_uint32(p, x) atomicSub((unsigned int *)(p), (unsigned int)(x)) # define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1) # define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1) +# define atomic_fetch_and_or_uint32(p, x) atomicOr((unsigned int *)(p), (unsigned int)(x)) ccl_device_inline float atomic_compare_and_swap_float(volatile float *dest, const float old_val, diff --git a/intern/cycles/util/util_boundbox.h b/intern/cycles/util/util_boundbox.h index b5c3f1a8954..7fab7bd5a15 100644 --- a/intern/cycles/util/util_boundbox.h +++ b/intern/cycles/util/util_boundbox.h @@ -17,8 +17,8 @@ #ifndef __UTIL_BOUNDBOX_H__ #define __UTIL_BOUNDBOX_H__ -#include <math.h> #include <float.h> +#include <math.h> #include "util/util_math.h" #include "util/util_string.h" diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h index b29d4163133..24a20a969ab 100644 --- a/intern/cycles/util/util_defines.h +++ b/intern/cycles/util/util_defines.h @@ -15,6 +15,11 @@ * limitations under the License. */ +/* clang-format off */ + +/* #define __forceinline triggers a bug in some clang-format versions, disable + * format for entire file to keep results consistent. */ + #ifndef __UTIL_DEFINES_H__ #define __UTIL_DEFINES_H__ diff --git a/intern/cycles/util/util_disjoint_set.h b/intern/cycles/util/util_disjoint_set.h index 80f3c714a29..946632371d2 100644 --- a/intern/cycles/util/util_disjoint_set.h +++ b/intern/cycles/util/util_disjoint_set.h @@ -17,8 +17,8 @@ #ifndef __UTIL_DISJOINT_SET_H__ #define __UTIL_DISJOINT_SET_H__ -#include <utility> #include "util_array.h" +#include <utility> CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h index 647e9cf2fd6..8de62893ba8 100644 --- a/intern/cycles/util/util_half.h +++ b/intern/cycles/util/util_half.h @@ -17,8 +17,8 @@ #ifndef __UTIL_HALF_H__ #define __UTIL_HALF_H__ -#include "util/util_types.h" #include "util/util_math.h" +#include "util/util_types.h" #ifdef __KERNEL_SSE2__ # include "util/util_simd.h" diff --git a/intern/cycles/util/util_md5.cpp b/intern/cycles/util/util_md5.cpp index c11f495f785..0df521c2b58 100644 --- a/intern/cycles/util/util_md5.cpp +++ b/intern/cycles/util/util_md5.cpp @@ -26,8 +26,8 @@ #include "util_md5.h" #include "util_path.h" -#include <string.h> #include <stdio.h> +#include <string.h> CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp index 77293c45f6b..8905c8bc7f0 100644 --- a/intern/cycles/util/util_path.cpp +++ b/intern/cycles/util/util_path.cpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "util/util_md5.h" #include "util/util_path.h" +#include "util/util_md5.h" #include "util/util_string.h" #include <OpenImageIO/filesystem.h> @@ -36,8 +36,8 @@ OIIO_NAMESPACE_USING # define DIR_SEP '/' # include <dirent.h> # include <pwd.h> -# include <unistd.h> # include <sys/types.h> +# include <unistd.h> #endif #ifdef HAVE_SHLWAPI_H diff --git a/intern/cycles/util/util_profiling.cpp b/intern/cycles/util/util_profiling.cpp index bbefbadd0fe..073b09f719f 100644 --- a/intern/cycles/util/util_profiling.cpp +++ b/intern/cycles/util/util_profiling.cpp @@ -14,8 +14,9 @@ * limitations under the License. */ -#include "util/util_algorithm.h" #include "util/util_profiling.h" +#include "util/util_algorithm.h" +#include "util/util_foreach.h" #include "util/util_set.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/util/util_profiling.h b/intern/cycles/util/util_profiling.h index f5f500239f2..ceec08ed894 100644 --- a/intern/cycles/util/util_profiling.h +++ b/intern/cycles/util/util_profiling.h @@ -19,7 +19,6 @@ #include <atomic> -#include "util/util_foreach.h" #include "util/util_map.h" #include "util/util_thread.h" #include "util/util_vector.h" diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h index 379beaeeefa..26534a29dfe 100644 --- a/intern/cycles/util/util_progress.h +++ b/intern/cycles/util/util_progress.h @@ -25,8 +25,8 @@ #include "util/util_function.h" #include "util/util_string.h" -#include "util/util_time.h" #include "util/util_thread.h" +#include "util/util_time.h" CCL_NAMESPACE_BEGIN @@ -204,6 +204,8 @@ class Progress { float get_progress() { + thread_scoped_lock lock(progress_mutex); + if (total_pixel_samples > 0) { return ((float)pixel_samples) / total_pixel_samples; } diff --git a/intern/cycles/util/util_semaphore.h b/intern/cycles/util/util_semaphore.h new file mode 100644 index 00000000000..d995b0732b8 --- /dev/null +++ b/intern/cycles/util/util_semaphore.h @@ -0,0 +1,61 @@ +/* + * Copyright 2011-2020 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_SEMAPHORE_H__ +#define __UTIL_SEMAPHORE_H__ + +#include "util/util_thread.h" + +CCL_NAMESPACE_BEGIN + +/* Counting Semaphore + * + * To restrict concurrent access to a resource to a specified number + * of threads. Similar to std::counting_semaphore from C++20. */ + +class thread_counting_semaphore { + public: + explicit thread_counting_semaphore(const int count) : count(count) + { + } + + thread_counting_semaphore(const thread_counting_semaphore &) = delete; + + void acquire() + { + thread_scoped_lock lock(mutex); + while (count == 0) { + condition.wait(lock); + } + count--; + } + + void release() + { + thread_scoped_lock lock(mutex); + count++; + condition.notify_one(); + } + + protected: + thread_mutex mutex; + thread_condition_variable condition; + int count; +}; + +CCL_NAMESPACE_END + +#endif /* __UTIL_SEMAPHORE_H__ */ diff --git a/intern/cycles/util/util_sky_model.cpp b/intern/cycles/util/util_sky_model.cpp index 4a6a9f32607..8cdad8a90a4 100644 --- a/intern/cycles/util/util_sky_model.cpp +++ b/intern/cycles/util/util_sky_model.cpp @@ -101,9 +101,9 @@ All instructions on how to use this code are in the accompanying header file. #include "util/util_sky_model_data.h" #include <assert.h> +#include <math.h> #include <stdio.h> #include <stdlib.h> -#include <math.h> CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/util/util_ssef.h b/intern/cycles/util/util_ssef.h index fa525daf37c..e9f0efb4efb 100644 --- a/intern/cycles/util/util_ssef.h +++ b/intern/cycles/util/util_ssef.h @@ -18,6 +18,8 @@ #ifndef __UTIL_SSEF_H__ #define __UTIL_SSEF_H__ +#include "util_ssei.h" + CCL_NAMESPACE_BEGIN #ifdef __KERNEL_SSE2__ diff --git a/intern/cycles/util/util_static_assert.h b/intern/cycles/util/util_static_assert.h index ceb52830319..d809f2e06d7 100644 --- a/intern/cycles/util/util_static_assert.h +++ b/intern/cycles/util/util_static_assert.h @@ -14,34 +14,20 @@ * limitations under the License. */ +/* clang-format off */ + +/* #define static_assert triggers a bug in some clang-format versions, disable + * format for entire file to keep results consistent. */ + #ifndef __UTIL_STATIC_ASSERT_H__ #define __UTIL_STATIC_ASSERT_H__ CCL_NAMESPACE_BEGIN -/* TODO(sergey): In theory CUDA might work with own static assert - * implementation since it's just pure C++. - */ -#ifdef __KERNEL_GPU__ -# ifndef static_assert -# define static_assert(statement, message) -# endif -#endif /* __KERNEL_GPU__ */ - -/* TODO(sergey): For until C++11 is a bare minimum for us, - * we do a bit of a trickery to show meaningful message so - * it's more or less clear what's wrong when building without - * C++11. - * - * The thing here is: our non-C++11 implementation doesn't - * have a way to print any message after preprocessor - * substitution so we rely on the message which is passed to - * static_assert() since that's the only message visible when - * compilation fails. - * - * After C++11 bump it should be possible to glue structure - * name to the error message, - */ +#if defined(__KERNEL_OPENCL__) || defined(CYCLES_CUBIN_CC) +# define static_assert(statement, message) +#endif /* __KERNEL_OPENCL__ */ + #define static_assert_align(st, align) \ static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned") // NOLINT diff --git a/intern/cycles/util/util_string.h b/intern/cycles/util/util_string.h index f71145741c9..ce2d4acdde4 100644 --- a/intern/cycles/util/util_string.h +++ b/intern/cycles/util/util_string.h @@ -17,9 +17,9 @@ #ifndef __UTIL_STRING_H__ #define __UTIL_STRING_H__ +#include <sstream> #include <string.h> #include <string> -#include <sstream> #include "util/util_vector.h" diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp index f700f9bd277..6d32153209a 100644 --- a/intern/cycles/util/util_system.cpp +++ b/intern/cycles/util/util_system.cpp @@ -17,8 +17,8 @@ #include "util/util_system.h" #include "util/util_logging.h" -#include "util/util_types.h" #include "util/util_string.h" +#include "util/util_types.h" #include <numaapi.h> @@ -35,8 +35,8 @@ OIIO_NAMESPACE_USING # include <sys/sysctl.h> # include <sys/types.h> #else -# include <unistd.h> # include <sys/ioctl.h> +# include <unistd.h> #endif CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp index 24286116dfb..61aa28c6815 100644 --- a/intern/cycles/util/util_task.cpp +++ b/intern/cycles/util/util_task.cpp @@ -14,10 +14,10 @@ * limitations under the License. */ +#include "util/util_task.h" #include "util/util_foreach.h" #include "util/util_logging.h" #include "util/util_system.h" -#include "util/util_task.h" #include "util/util_time.h" //#define THREADING_DEBUG_ENABLED diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h index d43852480d1..863c2ea3124 100644 --- a/intern/cycles/util/util_texture.h +++ b/intern/cycles/util/util_texture.h @@ -17,6 +17,8 @@ #ifndef __UTIL_TEXTURE_H__ #define __UTIL_TEXTURE_H__ +#include "util_transform.h" + CCL_NAMESPACE_BEGIN /* Texture limits on devices. */ @@ -91,12 +93,17 @@ typedef enum ExtensionType { typedef struct TextureInfo { /* Pointer, offset or texture depending on device. */ uint64_t data; + /* Data Type */ + uint data_type; /* Buffer number for OpenCL. */ uint cl_buffer; /* Interpolation and extension type. */ uint interpolation, extension; /* Dimensions. */ uint width, height, depth; + /* Transform for 3D textures. */ + uint use_transform_3d; + Transform transform_3d; } TextureInfo; CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_thread.h b/intern/cycles/util/util_thread.h index 18ec5b32144..f6dbc9186b8 100644 --- a/intern/cycles/util/util_thread.h +++ b/intern/cycles/util/util_thread.h @@ -17,11 +17,11 @@ #ifndef __UTIL_THREAD_H__ #define __UTIL_THREAD_H__ -#include <thread> -#include <mutex> #include <condition_variable> #include <functional> +#include <mutex> #include <queue> +#include <thread> #ifdef _WIN32 # include "util_windows.h" diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp index 302a8a386ac..101122740d7 100644 --- a/intern/cycles/util/util_transform.cpp +++ b/intern/cycles/util/util_transform.cpp @@ -46,8 +46,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "util/util_projection.h" #include "util/util_transform.h" +#include "util/util_projection.h" #include "util/util_boundbox.h" #include "util/util_math.h" diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h index 407654245cb..d0a6264d5cf 100644 --- a/intern/cycles/util/util_transform.h +++ b/intern/cycles/util/util_transform.h @@ -344,10 +344,10 @@ ccl_device_inline Transform transform_empty() ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t) { - /* use simpe nlerp instead of slerp. it's faster and almost the same */ + /* Optix is using lerp to interpolate motion transformations. */ +#ifdef __KERNEL_OPTIX__ return normalize((1.0f - t) * q1 + t * q2); - -#if 0 +#else /* __KERNEL_OPTIX__ */ /* note: this does not ensure rotation around shortest angle, q1 and q2 * are assumed to be matched already in transform_motion_decompose */ float costheta = dot(q1, q2); @@ -365,7 +365,7 @@ ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t) float thetap = theta * t; return q1 * cosf(thetap) + qperp * sinf(thetap); } -#endif +#endif /* __KERNEL_OPTIX__ */ } ccl_device_inline Transform transform_quick_inverse(Transform M) @@ -468,29 +468,6 @@ ccl_device void transform_motion_array_interpolate(Transform *tfm, #ifndef __KERNEL_GPU__ -# ifdef WITH_EMBREE -ccl_device void transform_motion_array_interpolate_straight( - Transform *tfm, const ccl_global DecomposedTransform *motion, uint numsteps, float time) -{ - /* Figure out which steps we need to interpolate. */ - int maxstep = numsteps - 1; - int step = min((int)(time * maxstep), maxstep - 1); - float t = time * maxstep - step; - - const ccl_global DecomposedTransform *a = motion + step; - const ccl_global DecomposedTransform *b = motion + step + 1; - Transform step1, step2; - - transform_compose(&step1, a); - transform_compose(&step2, b); - - /* matrix lerp */ - tfm->x = (1.0f - t) * step1.x + t * step2.x; - tfm->y = (1.0f - t) * step1.y + t * step2.y; - tfm->z = (1.0f - t) * step1.z + t * step2.z; -} -# endif - class BoundBox2D; ccl_device_inline bool operator==(const DecomposedTransform &A, const DecomposedTransform &B) diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index 48e9983ac8f..f6535848480 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -101,6 +101,11 @@ ccl_device_inline size_t round_down(size_t x, size_t multiple) return (x / multiple) * multiple; } +ccl_device_inline bool is_power_of_two(size_t x) +{ + return (x & (x - 1)) == 0; +} + CCL_NAMESPACE_END /* Vectorized types declaration. */ @@ -148,8 +153,8 @@ CCL_NAMESPACE_END /* SSE types. */ #ifndef __KERNEL_GPU__ # include "util/util_sseb.h" -# include "util/util_ssei.h" # include "util/util_ssef.h" +# include "util/util_ssei.h" # if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__) # include "util/util_avxb.h" # include "util/util_avxf.h" diff --git a/intern/cycles/util/util_version.h b/intern/cycles/util/util_version.h index 38829d3a29c..bb2c99cc6d7 100644 --- a/intern/cycles/util/util_version.h +++ b/intern/cycles/util/util_version.h @@ -22,7 +22,7 @@ CCL_NAMESPACE_BEGIN #define CYCLES_VERSION_MAJOR 1 -#define CYCLES_VERSION_MINOR 9 +#define CYCLES_VERSION_MINOR 12 #define CYCLES_VERSION_PATCH 0 #define CYCLES_MAKE_VERSION_STRING2(a, b, c) #a "." #b "." #c diff --git a/intern/cycles/util/util_view.cpp b/intern/cycles/util/util_view.cpp index f23174fd6dc..9d9ff451b3b 100644 --- a/intern/cycles/util/util_view.cpp +++ b/intern/cycles/util/util_view.cpp @@ -134,7 +134,7 @@ static void view_display() glMatrixMode(GL_PROJECTION); glLoadIdentity(); - gluOrtho2D(0, V.width, 0, V.height); + glOrtho(0, V.width, 0, V.height, -1, 1); glMatrixMode(GL_MODELVIEW); glLoadIdentity(); |