diff options
70 files changed, 1305 insertions, 225 deletions
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 0c3af3fabeb..c629ba82cf5 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -808,6 +808,82 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): items=enum_texture_limit ) + use_texture_cache: BoolProperty( + name="Use Texture Cache", + default=False, + description="Enables out-of-core texturing to conserve RAM" + ) + + texture_cache_size: IntProperty( + name="Texture Cache Size (MB)", + default=1024, + description="The size of the OpenImageIO texture cache in MB", + min=0 + ) + + texture_auto_convert: BoolProperty( + name="Auto Convert Textures", + default=True, + description="Automatically convert textures to .tx files for optimal texture cache performance" + ) + + texture_accept_unmipped: BoolProperty( + name="Accept Unmipped", + default=True, + description="Texture cached rendering without mip mapping is very expensive. Uncheck to prevent Cycles from using textures that are not mip mapped" + ) + + texture_accept_untiled: BoolProperty( + name="Accept Untiled", + default=True, + description="Texture cached rendering without tiled textures is very expensive. Uncheck to prevent Cycles from using textures that are not tiled" + ) + + texture_auto_tile: BoolProperty( + name="Auto Tile", + default=True, + description="On the fly creation of tiled versions of textures that are not tiled. This can increase render time but helps reduce memory usage" + ) + + texture_auto_mip: BoolProperty( + name="Auto Mip", + default=True, + description="On the fly creation of mip maps of textures that are not mip mapped. This can increase render time but helps reduce memory usage" + ) + + texture_tile_size: IntProperty( + name="Tile Size", + default=64, + description="The size of tiles that Cycles uses for auto tiling" + ) + + texture_blur_diffuse: FloatProperty( + name="Diffuse Blur", + default=0.0156, + description="The amount of texture blur applied to diffuse bounces", + min = 0.0, max = 1.0 + ) + + texture_blur_glossy: FloatProperty( + name="Glossy Blur", + default=0.0, + description="The amount of texture blur applied to glossy bounces", + min = 0.0, max = 1.0 + ) + + use_custom_cache_path: BoolProperty( + name="Use Custom Cache Path", + default=False, + description="Use a custom path for the texture cache, as oppoosed to placing cache files next to the original file" + ) + + custom_cache_path: StringProperty( + name="Custom Cache Path", + default="", + subtype="DIR_PATH", + description="Custom path for the texture cache" + ) + use_fast_gi: BoolProperty( name="Fast GI Approximation", description="Approximate diffuse indirect light with background tinted ambient occlusion. This provides fast alternative to full global illumination, for interactive viewport rendering or final renders with reduced quality", diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 47f7b4c6d73..46d67f27e9c 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -781,6 +781,40 @@ class CYCLES_RENDER_PT_performance_final_render(CyclesButtonsPanel, Panel): col.prop(rd, "use_save_buffers") col.prop(rd, "use_persistent_data", text="Persistent Data") +class CYCLES_RENDER_PT_texture_cache(CyclesButtonsPanel, Panel): + bl_label = "Texture Cache" + bl_options = {'DEFAULT_CLOSED'} + + def draw_header(self, context): + cscene = context.scene.cycles + + self.layout.prop(cscene, "use_texture_cache", text="") + + def draw(self, context): + layout = self.layout + + scene = context.scene + cscene = scene.cycles + rd = scene.render + layout.active = cscene.use_texture_cache + + split = layout.split() + col = split.column() + col.prop(cscene, "texture_auto_convert") + col.prop(cscene, "texture_accept_unmipped") + col.prop(cscene, "texture_accept_untiled") + col.prop(cscene, "texture_auto_mip") + col.prop(cscene, "texture_auto_tile") + col = split.column() + col.prop(cscene, "texture_cache_size") + col.prop(cscene, "texture_tile_size") + col.prop(cscene, "texture_blur_diffuse") + col.prop(cscene, "texture_blur_glossy") + row = layout.row() + row.prop(cscene, "use_custom_cache_path") + row = layout.row() + row.active = cscene.use_custom_cache_path + row.prop(cscene, "custom_cache_path") class CYCLES_RENDER_PT_performance_viewport(CyclesButtonsPanel, Panel): bl_label = "Viewport" @@ -2307,6 +2341,7 @@ classes = ( CYCLES_RENDER_PT_performance_acceleration_structure, CYCLES_RENDER_PT_performance_final_render, CYCLES_RENDER_PT_performance_viewport, + CYCLES_RENDER_PT_texture_cache, CYCLES_RENDER_PT_passes, CYCLES_RENDER_PT_passes_data, CYCLES_RENDER_PT_passes_light, diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp index 6e06b6a468f..eabf73bf72e 100644 --- a/intern/cycles/blender/blender_python.cpp +++ b/intern/cycles/blender/blender_python.cpp @@ -24,6 +24,7 @@ #include "blender/blender_util.h" #include "render/denoising.h" +#include "render/image_oiio.h" #include "render/merge.h" #include "util/util_debug.h" @@ -691,6 +692,22 @@ static PyObject *osl_compile_func(PyObject * /*self*/, PyObject *args) } #endif +static PyObject *oiio_make_tx(PyObject * /*self*/, PyObject *args) +{ + const char *inputfile = NULL, *outputfile = NULL, *colorspace = NULL; + int extension = EXTENSION_CLIP; + + if (!PyArg_ParseTuple(args, "sssi", &inputfile, &outputfile, &colorspace, &extension)) + return NULL; + + /* return */ + if (!OIIOImageLoader::make_tx( + inputfile, outputfile, ustring(colorspace), (ExtensionType)extension)) + Py_RETURN_FALSE; + + Py_RETURN_TRUE; +} + static PyObject *system_info_func(PyObject * /*self*/, PyObject * /*value*/) { string system_info = Device::device_capabilities(); @@ -1080,6 +1097,7 @@ static PyMethodDef methods[] = { {"osl_update_node", osl_update_node_func, METH_VARARGS, ""}, {"osl_compile", osl_compile_func, METH_VARARGS, ""}, #endif + {"oiio_make_tx", oiio_make_tx, METH_VARARGS, ""}, {"available_devices", available_devices_func, METH_VARARGS, ""}, {"system_info", system_info_func, METH_NOARGS, ""}, #ifdef WITH_OPENCL diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index 26d64b7bf85..f480bbfc5c9 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -802,6 +802,27 @@ SceneParams BlenderSync::get_scene_params(BL::Scene &b_scene, bool background) params.bvh_layout = DebugFlags().cpu.bvh_layout; + params.texture.use_cache = RNA_boolean_get(&cscene, "use_texture_cache"); + params.texture.cache_size = RNA_int_get(&cscene, "texture_cache_size"); + params.texture.auto_convert = RNA_boolean_get(&cscene, "texture_auto_convert"); + params.texture.accept_unmipped = RNA_boolean_get(&cscene, "texture_accept_unmipped"); + params.texture.accept_untiled = RNA_boolean_get(&cscene, "texture_accept_untiled"); + params.texture.tile_size = RNA_int_get(&cscene, "texture_tile_size"); + params.texture.auto_mip = RNA_boolean_get(&cscene, "texture_auto_mip"); + params.texture.auto_tile = RNA_boolean_get(&cscene, "texture_auto_tile"); + params.texture.diffuse_blur = RNA_float_get(&cscene, "texture_blur_diffuse"); + params.texture.glossy_blur = RNA_float_get(&cscene, "texture_blur_glossy"); + params.texture.use_custom_cache_path = RNA_boolean_get(&cscene, "use_custom_cache_path"); + if (params.texture.use_custom_cache_path) { + char *path = RNA_string_get_alloc(&cscene, "custom_cache_path", NULL, 0); + if (path) { + params.texture.custom_cache_path = path; + MEM_freeN(path); + } + } + else { + params.texture.custom_cache_path.clear(); + } params.background = background; return params; diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index ecf79bcdfa6..3c50f0d1755 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -376,6 +376,12 @@ class Device { return NULL; } + /* open image io, only for CPU device */ + virtual void *oiio_memory() + { + return NULL; + } + /* load/compile kernels, must be called before adding tasks */ virtual bool load_kernels(const DeviceRequestedFeatures & /*requested_features*/) { diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 4a6e77d6eaa..be09779bd89 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -39,6 +39,7 @@ #include "kernel/kernel_types.h" #include "kernel/split/kernel_split_data.h" #include "kernel/kernel_globals.h" +#include "kernel/kernel_oiio_globals.h" #include "kernel/kernel_adaptive_sampling.h" #include "kernel/filter/filter.h" @@ -184,6 +185,9 @@ class CPUDevice : public Device { #ifdef WITH_OSL OSLGlobals osl_globals; #endif + + OIIOGlobals oiio_globals; + #ifdef WITH_OPENIMAGEDENOISE oidn::DeviceRef oidn_device; oidn::FilterRef oidn_filter; @@ -312,6 +316,9 @@ class CPUDevice : public Device { #ifdef WITH_OSL kernel_globals.osl = &osl_globals; #endif + oiio_globals.tex_sys = NULL; + kernel_globals.oiio = &oiio_globals; + #ifdef WITH_EMBREE embree_device = rtcNewDevice("verbose=0"); #endif @@ -357,6 +364,12 @@ class CPUDevice : public Device { #endif task_pool.cancel(); texture_info.free(); + if (oiio_globals.tex_sys) { + VLOG(1) << oiio_globals.tex_sys->getstats(); + oiio_globals.tex_sys->reset_stats(); + TextureSystem::destroy(oiio_globals.tex_sys); + } + kernel_globals.oiio = NULL; } virtual bool show_samples() const override @@ -548,6 +561,11 @@ class CPUDevice : public Device { #endif } + void *oiio_memory() override + { + return &oiio_globals; + } + void build_bvh(BVH *bvh, Progress &progress, bool refit) override { #ifdef WITH_EMBREE @@ -1486,6 +1504,10 @@ class CPUDevice : public Device { #ifdef WITH_OSL OSLShader::thread_init(&kg, &kernel_globals, &osl_globals); #endif + if (kg.oiio && kg.oiio->tex_sys) { + kg.oiio_tdata = kg.oiio->tex_sys->get_perthread_info(); + } + return kg; } diff --git a/intern/cycles/device/device_memory.cpp b/intern/cycles/device/device_memory.cpp index 80a05fc32fe..de42c32e5e4 100644 --- a/intern/cycles/device/device_memory.cpp +++ b/intern/cycles/device/device_memory.cpp @@ -231,6 +231,12 @@ device_texture::device_texture(Device *device, data_type = TYPE_UINT16; data_elements = 1; break; + case IMAGE_DATA_TYPE_OIIO: + /* Assumes 64 bit pointers to be stored as uint. */ + static_assert(sizeof(void*) == sizeof(uint64_t)); + data_type = TYPE_UINT64; + data_elements = 1; + break; case IMAGE_DATA_NUM_TYPES: assert(0); return; diff --git a/intern/cycles/graph/node_type.h b/intern/cycles/graph/node_type.h index 8b37398fa17..4702a8a3337 100644 --- a/intern/cycles/graph/node_type.h +++ b/intern/cycles/graph/node_type.h @@ -79,7 +79,10 @@ struct SocketType { LINK_NORMAL = (1 << 8), LINK_POSITION = (1 << 9), LINK_TANGENT = (1 << 10), - DEFAULT_LINK_MASK = (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10) + LINK_TEXTURE_DX = (1 << 11), + LINK_TEXTURE_DY = (1 << 12), + DEFAULT_LINK_MASK = (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | + (1 << 10) | (1 << 11) | (1 << 12) }; ustring name; diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h index 6f2f2ebb202..51a7013df1f 100644 --- a/intern/cycles/kernel/closure/bsdf.h +++ b/intern/cycles/kernel/closure/bsdf.h @@ -211,7 +211,8 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg, omega_in, &domega_in->dx, &domega_in->dy, - pdf); + pdf, + sd); break; case CLOSURE_BSDF_REFRACTION_ID: label = bsdf_refraction_sample(sc, @@ -225,7 +226,8 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg, omega_in, &domega_in->dx, &domega_in->dy, - pdf); + pdf, + sd); break; case CLOSURE_BSDF_TRANSPARENT_ID: label = bsdf_transparent_sample(sc, @@ -257,7 +259,8 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg, omega_in, &domega_in->dx, &domega_in->dy, - pdf); + pdf, + sd); break; case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID: @@ -291,7 +294,8 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg, &domega_in->dx, &domega_in->dy, pdf, - &sd->lcg_state); + &sd->lcg_state, + sd); break; case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: @@ -307,7 +311,8 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg, omega_in, &domega_in->dx, &domega_in->dy, - pdf); + pdf, + sd); break; case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: label = bsdf_ashikhmin_shirley_sample(sc, diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h index 3d3f20edab3..e09eed928ea 100644 --- a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h +++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h @@ -161,8 +161,10 @@ ccl_device int bsdf_ashikhmin_velvet_sample(const ShaderClosure *sc, #ifdef __RAY_DIFFERENTIALS__ // TODO: find a better approximation for the retroreflective bounce - *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx; - *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy; + *domega_in_dx = (2.0f * dot(N, dIdx)) * N - dIdx; + *domega_in_dy = (2.0f * dot(N, dIdy)) * N - dIdy; + *domega_in_dx *= 125.0f; + *domega_in_dy *= 125.0f; #endif } else diff --git a/intern/cycles/kernel/closure/bsdf_diffuse.h b/intern/cycles/kernel/closure/bsdf_diffuse.h index ea604ed0311..0cf5c398bed 100644 --- a/intern/cycles/kernel/closure/bsdf_diffuse.h +++ b/intern/cycles/kernel/closure/bsdf_diffuse.h @@ -101,8 +101,10 @@ ccl_device int bsdf_diffuse_sample(const ShaderClosure *sc, *eval = make_float3(*pdf, *pdf, *pdf); #ifdef __RAY_DIFFERENTIALS__ // TODO: find a better approximation for the diffuse bounce - *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx; - *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy; + *domega_in_dx = (2.0f * dot(N, dIdx)) * N - dIdx; + *domega_in_dy = (2.0f * dot(N, dIdy)) * N - dIdy; + *domega_in_dx *= 125.0f; + *domega_in_dy *= 125.0f; #endif } else @@ -163,8 +165,10 @@ ccl_device int bsdf_translucent_sample(const ShaderClosure *sc, *eval = make_float3(*pdf, *pdf, *pdf); #ifdef __RAY_DIFFERENTIALS__ // TODO: find a better approximation for the diffuse bounce - *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx); - *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy); + *domega_in_dx = -((2.0f * dot(N, dIdx)) * N - dIdx); + *domega_in_dy = -((2.0f * dot(N, dIdy)) * N - dIdy); + *domega_in_dx *= 125.0f; + *domega_in_dy *= 125.0f; #endif } else { diff --git a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h index aa62c1c7ceb..0536ecf0f12 100644 --- a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h +++ b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h @@ -112,8 +112,10 @@ ccl_device int bsdf_diffuse_ramp_sample(const ShaderClosure *sc, if (dot(Ng, *omega_in) > 0.0f) { *eval = bsdf_diffuse_ramp_get_color(bsdf->colors, *pdf * M_PI_F) * M_1_PI_F; # ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx; - *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy; + *domega_in_dx = (2.0f * dot(N, dIdx)) * N - dIdx; + *domega_in_dy = (2.0f * dot(N, dIdy)) * N - dIdy; + *domega_in_dx *= 125.0f; + *domega_in_dy *= 125.0f; # endif } else diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h index 7ca9424b815..f9179e82d42 100644 --- a/intern/cycles/kernel/closure/bsdf_hair.h +++ b/intern/cycles/kernel/closure/bsdf_hair.h @@ -228,8 +228,10 @@ ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc, // differentials - TODO: find a better approximation for the reflective bounce #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx; - *domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy; + *domega_in_dx = 2.0f * dot(locy, dIdx) * locy - dIdx; + *domega_in_dy = 2.0f * dot(locy, dIdy) * locy - dIdy; + *domega_in_dx *= 10.0f; + *domega_in_dy *= 10.0f; #endif *pdf = fabsf(phi_pdf * theta_pdf); diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h index aed4b849aca..9fddbb83a02 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet.h @@ -570,7 +570,8 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, - float *pdf) + float *pdf, + const ShaderData *sd) { const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; float alpha_x = bsdf->alpha_x; @@ -699,8 +700,20 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, } #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx; - *domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy; +# ifdef __DNDU__ + float3 dwodx = -dIdx; + float3 dwody = -dIdy; + float dDNdx = dot(dwodx, N) + dot(I, sd->dNdx); + float dDNdy = dot(dwody, N) + dot(I, sd->dNdy); + *domega_in_dx = dwodx + 2.0f * (dot(I, N) * sd->dNdx + dDNdx * N); + *domega_in_dy = dwody + 2.0f * (dot(I, N) * sd->dNdy + dDNdy * N); +# else + *domega_in_dx = (2.0f * dot(m, dIdx)) * m - dIdx; + *domega_in_dy = (2.0f * dot(m, dIdy)) * m - dIdy; +# endif + const float softness = min(alpha_x, alpha_y) * 10.0f; + *domega_in_dx *= (1.0f + softness); + *domega_in_dy *= (1.0f + softness); #endif } } @@ -723,6 +736,10 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, &R, &T, #ifdef __RAY_DIFFERENTIALS__ +# ifdef __DNDU__ + sd->dNdx, + sd->dNdy, +# endif dIdx, dIdy, &dRdx, @@ -738,6 +755,9 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, #ifdef __RAY_DIFFERENTIALS__ *domega_in_dx = dTdx; *domega_in_dy = dTdy; + const float softness = min(alpha_x, alpha_y) * 10.0f; + *domega_in_dx *= (1.0f + softness); + *domega_in_dy *= (1.0f + softness); #endif if (alpha_x * alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) { @@ -998,7 +1018,8 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, - float *pdf) + float *pdf, + const ShaderData *sd) { const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; float alpha_x = bsdf->alpha_x; @@ -1090,8 +1111,20 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, } #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx; - *domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy; +# ifdef __DNDU__ + float3 dwodx = -dIdx; + float3 dwody = -dIdy; + float dDNdx = dot(dwodx, N) + dot(I, sd->dNdx); + float dDNdy = dot(dwody, N) + dot(I, sd->dNdy); + *domega_in_dx = dwodx + 2.f * (dot(I, N) * sd->dNdx + dDNdx * N); + *domega_in_dy = dwody + 2.f * (dot(I, N) * sd->dNdy + dDNdy * N); +# else + *domega_in_dx = (2.0f * dot(m, dIdx)) * m - dIdx; + *domega_in_dy = (2.0f * dot(m, dIdy)) * m - dIdy; +# endif + const float softness = min(alpha_x, alpha_y) * 10.0f; + *domega_in_dx *= (1.0f + softness); + *domega_in_dy *= (1.0f + softness); #endif } } @@ -1114,6 +1147,10 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, &R, &T, #ifdef __RAY_DIFFERENTIALS__ +# ifdef __DNDU__ + sd->dNdx, + sd->dNdy, +# endif dIdx, dIdy, &dRdx, @@ -1129,6 +1166,9 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, #ifdef __RAY_DIFFERENTIALS__ *domega_in_dx = dTdx; *domega_in_dy = dTdy; + const float softness = min(alpha_x, alpha_y) * 10.0f; + *domega_in_dx *= (1.0f + softness); + *domega_in_dy *= (1.0f + softness); #endif if (alpha_x * alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) { diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h index 9795c8da065..fef145798fe 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h @@ -526,8 +526,11 @@ ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals *kg, *omega_in = X * localO.x + Y * localO.y + Z * localO.z; #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx; - *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy; + *domega_in_dx = (2.0f * dot(Z, dIdx)) * Z - dIdx; + *domega_in_dy = (2.0f * dot(Z, dIdy)) * Z - dIdy; + const float softness = min(bsdf->alpha_x, bsdf->alpha_y) * 10.0f; + *domega_in_dx *= (1.0f + softness); + *domega_in_dy *= (1.0f + softness); #endif return LABEL_REFLECT | LABEL_GLOSSY; } @@ -641,7 +644,8 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals *kg, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf, - ccl_addr_space uint *lcg_state) + ccl_addr_space uint *lcg_state, + const ShaderData *sd) { const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; @@ -660,6 +664,10 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals *kg, &R, &T, #ifdef __RAY_DIFFERENTIALS__ +# ifdef __DNDU__ + sd->dNdx, + sd->dNdy, +# endif dIdx, dIdy, &dRdx, @@ -713,6 +721,9 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals *kg, #ifdef __RAY_DIFFERENTIALS__ *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx; *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy; + const float softness = min(bsdf->alpha_x, bsdf->alpha_y) * 10.0f; + *domega_in_dx *= (1.0f + softness); + *domega_in_dy *= (1.0f + softness); #endif return LABEL_REFLECT | LABEL_GLOSSY; } @@ -724,6 +735,9 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals *kg, ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdx, Z)) * Z; *domega_in_dy = -(bsdf->ior * dIdy) + ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdy, Z)) * Z; + const float softness = min(bsdf->alpha_x, bsdf->alpha_y) * 10.0f; + *domega_in_dx *= (1.0f + softness); + *domega_in_dy *= (1.0f + softness); #endif return LABEL_TRANSMIT | LABEL_GLOSSY; diff --git a/intern/cycles/kernel/closure/bsdf_oren_nayar.h b/intern/cycles/kernel/closure/bsdf_oren_nayar.h index 41e5736bf49..dfb6b381508 100644 --- a/intern/cycles/kernel/closure/bsdf_oren_nayar.h +++ b/intern/cycles/kernel/closure/bsdf_oren_nayar.h @@ -116,6 +116,8 @@ ccl_device int bsdf_oren_nayar_sample(const ShaderClosure *sc, // TODO: find a better approximation for the bounce *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; + *domega_in_dx *= 125.0f; + *domega_in_dy *= 125.0f; #endif } else { diff --git a/intern/cycles/kernel/closure/bsdf_phong_ramp.h b/intern/cycles/kernel/closure/bsdf_phong_ramp.h index cf5484383f2..b1c75f16ccd 100644 --- a/intern/cycles/kernel/closure/bsdf_phong_ramp.h +++ b/intern/cycles/kernel/closure/bsdf_phong_ramp.h @@ -123,8 +123,10 @@ ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, float3 R = (2 * cosNO) * bsdf->N - I; # ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; - *domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; + *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; + *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; + *domega_in_dx *= 10.0f; + *domega_in_dy *= 10.0f; # endif float3 T, B; diff --git a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h index 43646aaeb5b..6932be61830 100644 --- a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h +++ b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h @@ -125,8 +125,10 @@ ccl_device int bsdf_principled_diffuse_sample(const ShaderClosure *sc, #ifdef __RAY_DIFFERENTIALS__ // TODO: find a better approximation for the diffuse bounce - *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx); - *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy); + *domega_in_dx = (2.0f * dot(N, dIdx)) * N - dIdx; + *domega_in_dy = (2.0f * dot(N, dIdy)) * N - dIdy; + *domega_in_dx *= 125.0f; + *domega_in_dy *= 125.0f; #endif } else { diff --git a/intern/cycles/kernel/closure/bsdf_principled_sheen.h b/intern/cycles/kernel/closure/bsdf_principled_sheen.h index 3707de29d73..de1093a3392 100644 --- a/intern/cycles/kernel/closure/bsdf_principled_sheen.h +++ b/intern/cycles/kernel/closure/bsdf_principled_sheen.h @@ -126,8 +126,10 @@ ccl_device int bsdf_principled_sheen_sample(const ShaderClosure *sc, #ifdef __RAY_DIFFERENTIALS__ // TODO: find a better approximation for the diffuse bounce - *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx); - *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy); + *domega_in_dx = (2.0f * dot(N, dIdx)) * N - dIdx; + *domega_in_dy = (2.0f * dot(N, dIdy)) * N - dIdy; + *domega_in_dx *= 125.0f; + *domega_in_dy *= 125.0f; #endif } else { diff --git a/intern/cycles/kernel/closure/bsdf_reflection.h b/intern/cycles/kernel/closure/bsdf_reflection.h index c24ba170915..5e05e0a884e 100644 --- a/intern/cycles/kernel/closure/bsdf_reflection.h +++ b/intern/cycles/kernel/closure/bsdf_reflection.h @@ -70,7 +70,8 @@ ccl_device int bsdf_reflection_sample(const ShaderClosure *sc, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, - float *pdf) + float *pdf, + const ShaderData *sd) { const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; float3 N = bsdf->N; @@ -81,8 +82,18 @@ ccl_device int bsdf_reflection_sample(const ShaderClosure *sc, *omega_in = (2 * cosNO) * N - I; if (dot(Ng, *omega_in) > 0) { #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = 2 * dot(N, dIdx) * N - dIdx; - *domega_in_dy = 2 * dot(N, dIdy) * N - dIdy; +# ifdef __DNDU__ + /* as described in pbrt */ + float3 dwodx = -dIdx; + float3 dwody = -dIdy; + float dDNdx = dot(dwodx, N) + dot(I, sd->dNdx); + float dDNdy = dot(dwody, N) + dot(I, sd->dNdy); + *domega_in_dx = dwodx + 2.f * (dot(I, N) * sd->dNdx + dDNdx * N); + *domega_in_dy = dwody + 2.f * (dot(I, N) * sd->dNdy + dDNdy * N); +# else + *domega_in_dx = 2.0f * dot(N, dIdx) * N - dIdx; + *domega_in_dy = 2.0f * dot(N, dIdy) * N - dIdy; +# endif #endif /* Some high number for MIS. */ *pdf = 1e6f; diff --git a/intern/cycles/kernel/closure/bsdf_refraction.h b/intern/cycles/kernel/closure/bsdf_refraction.h index d4fbe86dac0..66265bab9b6 100644 --- a/intern/cycles/kernel/closure/bsdf_refraction.h +++ b/intern/cycles/kernel/closure/bsdf_refraction.h @@ -70,7 +70,8 @@ ccl_device int bsdf_refraction_sample(const ShaderClosure *sc, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, - float *pdf) + float *pdf, + ShaderData *sd) { const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; float m_eta = bsdf->ior; @@ -88,6 +89,10 @@ ccl_device int bsdf_refraction_sample(const ShaderClosure *sc, &R, &T, #ifdef __RAY_DIFFERENTIALS__ +# ifdef __DNDU__ + sd->dNdx, + sd->dNdy, +# endif dIdx, dIdy, &dRdx, diff --git a/intern/cycles/kernel/closure/bsdf_toon.h b/intern/cycles/kernel/closure/bsdf_toon.h index cc5de21ed0e..05fa2fa891a 100644 --- a/intern/cycles/kernel/closure/bsdf_toon.h +++ b/intern/cycles/kernel/closure/bsdf_toon.h @@ -142,6 +142,8 @@ ccl_device int bsdf_diffuse_toon_sample(const ShaderClosure *sc, // TODO: find a better approximation for the bounce *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; + *domega_in_dx *= 125.0f; + *domega_in_dy *= 125.0f; #endif } else @@ -233,8 +235,10 @@ ccl_device int bsdf_glossy_toon_sample(const ShaderClosure *sc, *eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle); #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; - *domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; + *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; + *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; + *domega_in_dx *= 10.0f; + *domega_in_dy *= 10.0f; #endif } else diff --git a/intern/cycles/kernel/closure/bsdf_util.h b/intern/cycles/kernel/closure/bsdf_util.h index a73dee1b045..00773a39e69 100644 --- a/intern/cycles/kernel/closure/bsdf_util.h +++ b/intern/cycles/kernel/closure/bsdf_util.h @@ -41,6 +41,10 @@ ccl_device float fresnel_dielectric(float eta, float3 *R, float3 *T, #ifdef __RAY_DIFFERENTIALS__ +# ifdef __DNDU__ + const float3 dNdx, + const float3 dNdy, +# endif const float3 dIdx, const float3 dIdy, float3 *dRdx, @@ -69,14 +73,14 @@ ccl_device float fresnel_dielectric(float eta, } // compute reflection - *R = (2 * cos) * Nn - I; + *R = (2.0f * cos) * Nn - I; #ifdef __RAY_DIFFERENTIALS__ - *dRdx = (2 * dot(Nn, dIdx)) * Nn - dIdx; - *dRdy = (2 * dot(Nn, dIdy)) * Nn - dIdy; + *dRdx = (2.0f * dot(Nn, dIdx)) * Nn - dIdx; + *dRdy = (2.0f * dot(Nn, dIdy)) * Nn - dIdy; #endif float arg = 1 - (neta * neta * (1 - (cos * cos))); - if (arg < 0) { + if (arg < 0.0f) { *T = make_float3(0.0f, 0.0f, 0.0f); #ifdef __RAY_DIFFERENTIALS__ *dTdx = make_float3(0.0f, 0.0f, 0.0f); @@ -89,8 +93,16 @@ ccl_device float fresnel_dielectric(float eta, float nK = (neta * cos) - dnp; *T = -(neta * I) + (nK * Nn); #ifdef __RAY_DIFFERENTIALS__ - *dTdx = -(neta * dIdx) + ((neta - neta * neta * cos / dnp) * dot(dIdx, Nn)) * Nn; - *dTdy = -(neta * dIdy) + ((neta - neta * neta * cos / dnp) * dot(dIdy, Nn)) * Nn; +# ifndef __DNDU__ +# define dNdx make_float3(0.0f, 0.0f, 0.0f) +# define dNdy make_float3(0.0f, 0.0f, 0.0f) +# endif + float dDNdx = dot(dIdx, Nn) - dot(I, dNdx); + float dDNdy = dot(dIdy, Nn) - dot(I, dNdy); + float dmudx = (neta - neta * neta * cos / dnp) * dDNdx; + float dmudy = (neta - neta * neta * cos / dnp) * dDNdy; + *dTdx = -(neta * dIdx) + (nK * dNdx + dmudx * Nn); + *dTdy = -(neta * dIdy) + (nK * dNdy + dmudy * Nn); #endif // compute Fresnel terms float cosTheta1 = cos; // N.R diff --git a/intern/cycles/kernel/closure/volume.h b/intern/cycles/kernel/closure/volume.h index 1430f712701..a55766915ad 100644 --- a/intern/cycles/kernel/closure/volume.h +++ b/intern/cycles/kernel/closure/volume.h @@ -90,8 +90,15 @@ ccl_device float3 volume_henyey_greenstein_eval_phase(const ShaderClosure *sc, return make_float3(*pdf, *pdf, *pdf); } -ccl_device float3 -henyey_greenstrein_sample(float3 D, float g, float randu, float randv, float *pdf) +ccl_device float3 henyey_greenstrein_sample(float3 D, + float g, + float randu, + float randv, + float *pdf, + float3 dIdx, + float3 dIdy, + float3 *domega_in_dx, + float3 *domega_in_dy) { /* match pdf for small g */ float cos_theta; @@ -113,12 +120,33 @@ henyey_greenstrein_sample(float3 D, float g, float randu, float randv, float *pd float sin_theta = safe_sqrtf(1.0f - cos_theta * cos_theta); float phi = M_2PI_F * randv; - float3 dir = make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cos_theta); + float cos_phi = cosf(phi); + float sin_phi = sinf(phi); + float3 dir = make_float3(sin_theta * cos_phi, sin_theta * sin_phi, cos_theta); float3 T, B; make_orthonormals(D, &T, &B); dir = dir.x * T + dir.y * B + dir.z * D; +#ifdef __RAY_DIFFERENTIALS__ + if (domega_in_dx && domega_in_dy) { + if (pdf && *pdf < 1.0f) { + float spread = 0.125f / sqrtf(*pdf); + make_orthonormals(dir, &T, &B); + *domega_in_dx = spread * T; + *domega_in_dy = spread * B; + } + else { + make_orthonormals(D - dIdx, &T, &B); + *domega_in_dx = sin_theta * cos_phi * T + sin_theta * sin_phi * B + cos_theta * (D - dIdx) - + dir; + make_orthonormals(D - dIdy, &T, &B); + *domega_in_dy = sin_theta * cos_phi * T + sin_theta * sin_phi * B + cos_theta * (D - dIdy) - + dir; + } + } +#endif + return dir; } @@ -138,15 +166,10 @@ ccl_device int volume_henyey_greenstein_sample(const ShaderClosure *sc, float g = volume->g; /* note that I points towards the viewer and so is used negated */ - *omega_in = henyey_greenstrein_sample(-I, g, randu, randv, pdf); + *omega_in = henyey_greenstrein_sample( + -I, g, randu, randv, pdf, dIdx, dIdy, domega_in_dx, domega_in_dy); *eval = make_float3(*pdf, *pdf, *pdf); /* perfect importance sampling */ -#ifdef __RAY_DIFFERENTIALS__ - /* todo: implement ray differential estimation */ - *domega_in_dx = make_float3(0.0f, 0.0f, 0.0f); - *domega_in_dy = make_float3(0.0f, 0.0f, 0.0f); -#endif - return LABEL_VOLUME_SCATTER; } diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h index e25bf5b4660..6258948cffa 100644 --- a/intern/cycles/kernel/geom/geom_curve_intersect.h +++ b/intern/cycles/kernel/geom/geom_curve_intersect.h @@ -778,6 +778,10 @@ ccl_device_inline void curve_shader_setup(KernelGlobals *kg, sd->dPdu = dPdu; sd->dPdv = cross(dPdu, sd->Ng); # endif +# ifdef __DNDU__ + sd->dNdx = make_float3(0.0f, 0.0f, 0.0f); + sd->dNdy = make_float3(0.0f, 0.0f, 0.0f); +# endif if (isect->object != OBJECT_NONE) { # ifdef __OBJECT_MOTION__ diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h index 7a91f8041f7..258e72ed674 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h @@ -85,6 +85,7 @@ ccl_device_noinline void motion_triangle_shader_setup( sd->dPdu = (verts[0] - verts[2]); sd->dPdv = (verts[1] - verts[2]); #endif + /* Compute smooth normal. */ if (sd->shader & SHADER_SMOOTH_NORMAL) { /* Find attribute. */ diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h index 1e7fbd9c7fb..77ce0227ec8 100644 --- a/intern/cycles/kernel/geom/geom_triangle.h +++ b/intern/cycles/kernel/geom/geom_triangle.h @@ -139,6 +139,22 @@ ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, *dPdv = (p1 - p2); } +ccl_device_inline void triangle_dNdudv(KernelGlobals *kg, + int prim, + ccl_addr_space float3 *dNdu, + ccl_addr_space float3 *dNdv) +{ + /* load triangle vertices */ + const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x)); + float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y)); + float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z)); + + /* compute derivatives of N w.r.t. uv */ + *dNdu = (n0 - n2); + *dNdv = (n1 - n2); +} + /* Reading attributes on various triangle elements */ ccl_device float triangle_attribute_float( diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index 7da890b908d..807e5c050f8 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -283,6 +283,7 @@ ccl_device void kernel_bake_evaluate( P, Ng, Ng, + NULL, shader, object, prim, diff --git a/intern/cycles/kernel/kernel_differential.h b/intern/cycles/kernel/kernel_differential.h index 3ec0cdbaccc..33bf019ac84 100644 --- a/intern/cycles/kernel/kernel_differential.h +++ b/intern/cycles/kernel/kernel_differential.h @@ -36,6 +36,23 @@ ccl_device void differential_transfer(ccl_addr_space differential3 *dP_, dP_->dy = tmpy - dot(tmpy, Ng) * tmp; } +ccl_device void differential_reflect(differential3 *dD_, + float3 D, + const ccl_addr_space differential3 *dD, + float3 N, + const differential3 *dN) +{ + /* ray differential transfer through homogeneous medium, to + * compute dPdx/dy at a shading point from the incoming ray */ + + const float dotDN = dot(D, N); + const float3 tmpx = N * (dot(dD->dx, N) + dot(D, dN->dx)); + const float3 tmpy = N * (dot(dD->dy, N) + dot(D, dN->dy)); + + dD_->dx = dD->dx - 2.0f * (dotDN * dN->dx + tmpx); + dD_->dy = dD->dy - 2.0f * (dotDN * dN->dy + tmpy); +} + ccl_device void differential_incoming(ccl_addr_space differential3 *dI, const differential3 dD) { /* compute dIdx/dy at a shading point, we just need to negate the diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h index aebf2ec8e28..b8322e20a3c 100644 --- a/intern/cycles/kernel/kernel_emission.h +++ b/intern/cycles/kernel/kernel_emission.h @@ -57,6 +57,7 @@ ccl_device_noinline_cpu float3 direct_emissive_eval(KernelGlobals *kg, ls->P, ls->Ng, I, + &dI, ls->shader, ls->object, ls->prim, @@ -111,10 +112,21 @@ ccl_device_noinline_cpu bool direct_emission(KernelGlobals *kg, if (ls->pdf == 0.0f) return false; - /* todo: implement */ - differential3 dD = differential3_zero(); + differential3 dD; + differential3 dN; +#ifdef __DNDU__ + dN.dx = sd->dNdx; + dN.dy = sd->dNdy; +#else + dN = differential3_zero(); +#endif + /* This is how differentials are calculated for a perfect specular reflection. + * This is not the exact value that we should be getting here, + * but it's still better than using zero differentials. */ + differential_reflect(&dD, sd->I, &sd->dI, sd->N, &dN); /* evaluate closure */ + emission_sd->dP = sd->dP; float3 light_eval = direct_emissive_eval( kg, emission_sd, ls, state, -ls->D, dD, ls->t, sd->time); diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index 70aed6d54ed..383464dbfd9 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -44,6 +44,9 @@ struct OSLGlobals; struct OSLThreadData; struct OSLShadingSystem; # endif +# ifdef __OIIO__ +struct OIIOGlobals; +# endif typedef unordered_map<float, float> CoverageMap; @@ -64,6 +67,11 @@ typedef struct KernelGlobals { OSLThreadData *osl_tdata; # endif +# ifdef __OIIO__ + OIIOGlobals *oiio; + void *oiio_tdata; +# endif + /* **** Run-time data **** */ /* Heap-allocated storage for transparent shadows intersections. */ diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index 42a834d2ce3..d9a06aa5389 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -333,6 +333,33 @@ ccl_device bool lamp_light_eval( return true; } +ccl_device void lamp_light_dPdudv(KernelGlobals *kg, + int lamp, + float u, + float v, + ccl_addr_space float3 *dPdu, + ccl_addr_space float3 *dPdv) +{ + const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp); + LightType type = (LightType)klight->type; + + switch (type) { + case LIGHT_AREA: { + *dPdu = make_float3(klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]); + *dPdv = make_float3(klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]); + break; + } + case LIGHT_POINT: + case LIGHT_DISTANT: + case LIGHT_SPOT: + default: + // TODO (Stefan) + *dPdu = make_float3(0.0f, 0.0f, 0.0f); + *dPdv = make_float3(0.0f, 0.0f, 0.0f); + break; + } +} + /* Triangle Light */ /* returns true if the triangle is has motion blur or an instancing transform applied */ diff --git a/intern/cycles/kernel/kernel_oiio_globals.h b/intern/cycles/kernel/kernel_oiio_globals.h new file mode 100644 index 00000000000..62a347fea7a --- /dev/null +++ b/intern/cycles/kernel/kernel_oiio_globals.h @@ -0,0 +1,41 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __KERNEL_OIIO_GLOBALS_H__ +#define __KERNEL_OIIO_GLOBALS_H__ + +#include "util/util_thread.h" +#include "util/util_vector.h" +#include <OpenImageIO/texture.h> + +CCL_NAMESPACE_BEGIN + +struct OIIOTexture { + OIIO::TextureSystem::TextureHandle *handle; + OIIO::TextureOpt::InterpMode interpolation; + OIIO::TextureOpt::Wrap extension; + bool is_linear; +}; + +struct OIIOGlobals { + OIIO::TextureSystem *tex_sys; + float diffuse_blur; + float glossy_blur; +}; + +CCL_NAMESPACE_END + +#endif diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 92a097de9e1..690f5cd86ef 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -30,8 +30,8 @@ #include "kernel/kernel_write_passes.h" #include "kernel/kernel_accumulate.h" -#include "kernel/kernel_shader.h" #include "kernel/kernel_light.h" +#include "kernel/kernel_shader.h" #include "kernel/kernel_adaptive_sampling.h" #include "kernel/kernel_passes.h" @@ -354,8 +354,14 @@ ccl_device_noinline light_ray.D = ao_D; light_ray.t = kernel_data.background.ao_distance; light_ray.time = sd->time; +#ifdef __RAY_DIFFERENTIALS__ light_ray.dP = sd->dP; - light_ray.dD = differential3_zero(); + /* This is how pbrt v3 implements differentials for diffuse bounces */ + float3 a, b; + make_orthonormals(ao_D, &a, &b); + light_ray.dD.dx = normalize(ao_D + 0.1f * a); + light_ray.dD.dy = normalize(ao_D + 0.1f * b); +#endif /* __RAY_DIFFERENTIALS__ */ if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) { path_radiance_accum_ao(kg, L, state, throughput, ao_alpha, ao_bsdf, ao_shadow); diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h index a1ee1bc107e..ec02e3db901 100644 --- a/intern/cycles/kernel/kernel_path_branched.h +++ b/intern/cycles/kernel/kernel_path_branched.h @@ -50,8 +50,14 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg, light_ray.D = ao_D; light_ray.t = kernel_data.background.ao_distance; light_ray.time = sd->time; +# ifdef __RAY_DIFFERENTIALS__ light_ray.dP = sd->dP; - light_ray.dD = differential3_zero(); + /* This is how pbrt v3 implements differentials for diffuse bounces */ + float3 a, b; + make_orthonormals(ao_D, &a, &b); + light_ray.dD.dx = normalize(ao_D + 0.1f * a); + light_ray.dD.dy = normalize(ao_D + 0.1f * b); +# endif /* __RAY_DIFFERENTIALS__ */ if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) { path_radiance_accum_ao( diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index 7f02e6fc7b3..088e63d91ea 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -146,6 +146,34 @@ ccl_device_noinline differential_transfer(&sd->dP, ray->dP, ray->D, ray->dD, sd->Ng, isect->t); differential_incoming(&sd->dI, ray->dD); differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng); +# ifdef __DNDU__ + if (sd->shader & SHADER_SMOOTH_NORMAL && sd->type & PRIMITIVE_TRIANGLE) { + // TODO stefan curves + /* dNdu/dNdv */ + float3 dNdu, dNdv; + triangle_dNdudv(kg, sd->prim, &dNdu, &dNdv); + sd->dNdx = dNdu * sd->du.dx + dNdv * sd->dv.dx; + sd->dNdy = dNdu * sd->du.dy + dNdv * sd->dv.dy; + + /* backfacing test */ + bool backfacing = (dot(sd->Ng, sd->I) < 0.0f); + if (backfacing) { + sd->dNdx = -sd->dNdx; + sd->dNdy = -sd->dNdy; + } +# ifdef __INSTANCING__ + if (isect->object != OBJECT_NONE) { + /* instance transform */ + object_dir_transform_auto(kg, sd, &sd->dNdx); + object_dir_transform_auto(kg, sd, &sd->dNdy); + } +# endif /* __INSTANCING__ */ + } + else { + sd->dNdx = make_float3(0.0f, 0.0f, 0.0f); + sd->dNdy = make_float3(0.0f, 0.0f, 0.0f); + } +# endif /* __DNDU__ */ #endif PROFILING_SHADER(sd->shader); @@ -179,6 +207,11 @@ ccl_device_inline sd->u = isect->u; sd->v = isect->v; +# ifdef __DNDU__ + sd->dNdx = make_float3(0.0f, 0.0f, 0.0f); + sd->dNdy = make_float3(0.0f, 0.0f, 0.0f); +# endif + /* fetch triangle data */ if (sd->type == PRIMITIVE_TRIANGLE) { float3 Ng = triangle_normal(kg, sd); @@ -196,6 +229,15 @@ ccl_device_inline /* dPdu/dPdv */ triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); # endif +# ifdef __DNDU__ + /* dNdu/dNdv */ + if (sd->shader & SHADER_SMOOTH_NORMAL && sd->type & PRIMITIVE_TRIANGLE) { + float3 dNdu, dNdv; + triangle_dNdudv(kg, sd->prim, &dNdu, &dNdv); + sd->dNdx = dNdu * sd->du.dx + dNdv * sd->dv.dx; + sd->dNdy = dNdu * sd->du.dy + dNdv * sd->dv.dy; + } +# endif } else { /* motion triangle */ @@ -212,6 +254,10 @@ ccl_device_inline object_dir_transform_auto(kg, sd, &sd->dPdu); object_dir_transform_auto(kg, sd, &sd->dPdv); # endif +# ifdef __DNDU__ + object_dir_transform(kg, sd, &sd->dNdx); + object_dir_transform(kg, sd, &sd->dNdy); +# endif } /* backfacing test */ @@ -223,6 +269,10 @@ ccl_device_inline sd->dPdu = -sd->dPdu; sd->dPdv = -sd->dPdv; # endif +# ifdef __DNDU__ + sd->dNdx = -sd->dNdx; + sd->dNdy = -sd->dNdy; +# endif } /* should not get used in principle as the shading will only use a diffuse @@ -246,6 +296,7 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg, const float3 P, const float3 Ng, const float3 I, + const differential3 *dI, int shader, int object, int prim, @@ -327,12 +378,39 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg, object_dir_transform_auto(kg, sd, &sd->dPdv); } #endif +#ifdef __DNDU__ + + float3 dNdu, dNdv; + triangle_dNdudv(kg, sd->prim, &dNdu, &dNdv); + sd->dNdx = dNdu * sd->du.dx + dNdv * sd->dv.dx; + sd->dNdy = dNdu * sd->du.dy + dNdv * sd->dv.dy; + +# ifdef __INSTANCING__ + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + object_normal_transform_auto(kg, sd, &sd->dNdx); + object_normal_transform_auto(kg, sd, &sd->dNdy); + } +# endif +#endif + } + else if (sd->type & PRIMITIVE_LAMP) { +#ifdef __DPDU__ + lamp_light_dPdudv(kg, lamp, sd->u, sd->v, &sd->dPdu, &sd->dPdv); +#endif +#ifdef __DNDU__ + sd->dNdx = make_float3(0.0f, 0.0f, 0.0f); + sd->dNdy = make_float3(0.0f, 0.0f, 0.0f); +#endif } else { #ifdef __DPDU__ sd->dPdu = zero_float3(); sd->dPdv = zero_float3(); #endif +#ifdef __DNDU__ + sd->dNdx = make_float3(0.0f, 0.0f, 0.0f); + sd->dNdy = make_float3(0.0f, 0.0f, 0.0f); +#endif } /* backfacing test */ @@ -347,15 +425,26 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg, sd->dPdu = -sd->dPdu; sd->dPdv = -sd->dPdv; #endif +#ifdef __DNDU__ + sd->dNdx = -sd->dNdx; + sd->dNdx = -sd->dNdx; +#endif } } #ifdef __RAY_DIFFERENTIALS__ - /* no ray differentials here yet */ - sd->dP = differential3_zero(); - sd->dI = differential3_zero(); - sd->du = differential_zero(); - sd->dv = differential_zero(); + if (dI) { + sd->dI = *dI; + differential_transfer(&sd->dP, sd->dP, I, *dI, Ng, t); + differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng); + } + else { + sd->dP = differential3_zero(); + sd->dI = differential3_zero(); + sd->du = differential_zero(); + sd->dv = differential_zero(); + } + #endif PROFILING_SHADER(sd->shader); @@ -371,25 +460,32 @@ ccl_device void shader_setup_from_displace( int shader; triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader); + triangle_dPdudv(kg, prim, &sd->dP.dx, &sd->dP.dy); /* force smooth shading for displacement */ shader |= SHADER_SMOOTH_NORMAL; +#if 0 + /* TODO Stefan - need differentials here that don't break the unfiltered case */ + I = -Ng; + differential3 dI = differential3_zero(); + + shader_setup_from_sample(kg, sd, + P, Ng, I, &dI, +#else + shader_setup_from_sample( kg, sd, P, Ng, I, - shader, - object, - prim, - u, - v, - 0.0f, - 0.5f, - !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED), - LAMP_NONE); + NULL, +#endif + shader, object, prim, + u, v, 0.0f, 0.5f, + !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED), + LAMP_NONE); } /* ShaderData setup from ray into background */ @@ -422,6 +518,10 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, sd->dPdu = zero_float3(); sd->dPdv = zero_float3(); #endif +#ifdef __DNDU__ + sd->dNdx = make_float3(0.0f, 0.0f, 0.0f); + sd->dNdy = make_float3(0.0f, 0.0f, 0.0f); +#endif #ifdef __RAY_DIFFERENTIALS__ /* differentials */ @@ -469,11 +569,17 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *s sd->dPdu = zero_float3(); sd->dPdv = zero_float3(); # endif +# ifdef __DNDU__ + /* dNdu/dNdv */ + sd->dNdx = make_float3(0.0f, 0.0f, 0.0f); + sd->dNdy = make_float3(0.0f, 0.0f, 0.0f); +# endif # ifdef __RAY_DIFFERENTIALS__ /* differentials */ - sd->dP = ray->dD; - differential_incoming(&sd->dI, sd->dP); + sd->dP.dx = ray->dP.dx + ray->t * ray->dD.dx; + sd->dP.dy = ray->dP.dy + ray->t * ray->dD.dy; + differential_incoming(&sd->dI, ray->dD); sd->du = differential_zero(); sd->dv = differential_zero(); # endif diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h index 677504a4045..93ca362202d 100644 --- a/intern/cycles/kernel/kernel_subsurface.h +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -558,6 +558,7 @@ ccl_device_noinline /* Sample scattering direction. */ float scatter_u, scatter_v; path_state_rng_2D(kg, state, PRNG_BSDF_U, &scatter_u, &scatter_v); + float cos_theta; if (guided) { cos_theta = sample_phase_dwivedi(diffusion_length, phase_log, scatter_u); diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 7cbe18acf28..1519b2ace9d 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -124,6 +124,8 @@ CCL_NAMESPACE_BEGIN # endif # define __VOLUME_DECOUPLED__ # define __VOLUME_RECORD_ALL__ +# define __DNDU__ +# define __OIIO__ #endif /* __KERNEL_CPU__ */ #ifdef __KERNEL_CUDA__ @@ -953,6 +955,11 @@ typedef ccl_addr_space struct ccl_align(16) ShaderData float3 dPdu; float3 dPdv; #endif +#ifdef __DNDU__ + /* differential of N w.r.t. x and y. */ + float3 dNdx; + float3 dNdy; +#endif #ifdef __OBJECT_MOTION__ /* object <-> world space transformations, cached to avoid diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx.cpp index 012daba62d8..a9300c6724f 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_avx.cpp +++ b/intern/cycles/kernel/kernels/cpu/filter_avx.cpp @@ -34,6 +34,8 @@ # endif #endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ +#define OIIO_NO_AVX 1 + #include "kernel/filter/filter.h" #define KERNEL_ARCH cpu_avx #include "kernel/kernels/cpu/filter_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp index 16351a7f949..ee861852176 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp +++ b/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp @@ -35,6 +35,8 @@ # endif #endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ +#define OIIO_NO_AVX 1 + #include "kernel/filter/filter.h" #define KERNEL_ARCH cpu_avx2 #include "kernel/kernels/cpu/filter_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp index 5f6b6800363..b622ad80401 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp @@ -34,6 +34,8 @@ # endif #endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ +#define OIIO_NO_AVX 1 + #include "kernel/kernel.h" #define KERNEL_ARCH cpu_avx #include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp index 97e8fc25140..1e1008bbc0a 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp @@ -35,6 +35,8 @@ # endif #endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ +#define OIIO_NO_AVX 1 + #include "kernel/kernel.h" #define KERNEL_ARCH cpu_avx2 #include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index 59b96c86c50..073b6defa5c 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -23,6 +23,14 @@ # include <nanovdb/util/SampleFromVoxels.h> #endif +#ifdef __OIIO__ +# include "kernel/kernel_oiio_globals.h" +# define NEAREST_LOOKUP_PATHS \ + (PATH_RAY_DIFFUSE | PATH_RAY_SHADOW | PATH_RAY_DIFFUSE_ANCESTOR | PATH_RAY_VOLUME_SCATTER | \ + PATH_RAY_GLOSSY | PATH_RAY_EMISSION) +# define DIFFUSE_BLUR_PATHS (PATH_RAY_DIFFUSE | PATH_RAY_DIFFUSE_ANCESTOR) +#endif + CCL_NAMESPACE_BEGIN /* Make template functions private so symbols don't conflict between kernels with different @@ -584,32 +592,126 @@ template<typename T> struct NanoVDBInterpolator { #undef SET_CUBIC_SPLINE_WEIGHTS -ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) +ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y, differential ds, differential dt, uint path_flag) { const TextureInfo &info = kernel_tex_fetch(__texture_info, id); + float4 r = make_float4(TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); switch (info.data_type) { case IMAGE_DATA_TYPE_HALF: - return TextureInterpolator<half>::interp(info, x, y); + r = TextureInterpolator<half>::interp(info, x, y); + break; case IMAGE_DATA_TYPE_BYTE: - return TextureInterpolator<uchar>::interp(info, x, y); + r = TextureInterpolator<uchar>::interp(info, x, y); + break; case IMAGE_DATA_TYPE_USHORT: - return TextureInterpolator<uint16_t>::interp(info, x, y); + r = TextureInterpolator<uint16_t>::interp(info, x, y); + break; case IMAGE_DATA_TYPE_FLOAT: - return TextureInterpolator<float>::interp(info, x, y); + r = TextureInterpolator<float>::interp(info, x, y); + break; case IMAGE_DATA_TYPE_HALF4: - return TextureInterpolator<half4>::interp(info, x, y); + r = TextureInterpolator<half4>::interp(info, x, y); + break; case IMAGE_DATA_TYPE_BYTE4: - return TextureInterpolator<uchar4>::interp(info, x, y); + r = TextureInterpolator<uchar4>::interp(info, x, y); + break; case IMAGE_DATA_TYPE_USHORT4: - return TextureInterpolator<ushort4>::interp(info, x, y); + r = TextureInterpolator<ushort4>::interp(info, x, y); + break; case IMAGE_DATA_TYPE_FLOAT4: - return TextureInterpolator<float4>::interp(info, x, y); + r = TextureInterpolator<float4>::interp(info, x, y); + break; + case IMAGE_DATA_TYPE_OIIO: + { +#ifdef __OIIO__ + /* Make sure we have all necessary data in place, if not, bail. */ + kernel_assert(kg->oiio); + kernel_assert(kg->oiio->tex_sys); + kernel_assert(info.data); + if (!kg->oiio || !kg->oiio->tex_sys || !info.data) { + return r; + } + /* Options: Anisotropic is a quality/speed tradeoff. + * Interpolation and extensions are supported in OIIO under different constants. + * */ + OIIO::TextureOpt options; + options.anisotropic = 8; + float missingcolor[4] = { + TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A}; + options.missingcolor = missingcolor; + options.mipmode = OIIO::TextureOpt::MipModeAniso; + options.sblur = options.tblur = 0.0f; + switch (info.interpolation) { + case INTERPOLATION_SMART: + options.interpmode = OIIO::TextureOpt::InterpSmartBicubic; + break; + case INTERPOLATION_CUBIC: + options.interpmode = OIIO::TextureOpt::InterpBicubic; + break; + case INTERPOLATION_LINEAR: + options.interpmode = OIIO::TextureOpt::InterpBilinear; + break; + //case INTERPOLATION_NONE: + case INTERPOLATION_CLOSEST: + default: + options.interpmode = OIIO::TextureOpt::InterpClosest; + break; + } + switch (info.extension) { + case EXTENSION_CLIP: + options.swrap = options.twrap = OIIO::TextureOpt::WrapBlack; + break; + case EXTENSION_EXTEND: + options.swrap = options.twrap = OIIO::TextureOpt::WrapClamp; + break; + case EXTENSION_REPEAT: + default: + options.swrap = options.twrap = OIIO::TextureOpt::WrapPeriodic; + break; + } + + /* Texture lookup simplifications on less important paths. */ + if (path_flag & NEAREST_LOOKUP_PATHS && !(path_flag & PATH_RAY_SINGULAR)) { + options.interpmode = OIIO::TextureOpt::InterpClosest; + options.mipmode = OIIO::TextureOpt::MipModeOneLevel; + } + else { + options.mipmode = OIIO::TextureOpt::MipModeAniso; + } + if (path_flag & DIFFUSE_BLUR_PATHS) { + options.sblur = options.tblur = kg->oiio->diffuse_blur; + } + else if (path_flag & PATH_RAY_GLOSSY) { + options.sblur = options.tblur = kg->oiio->glossy_blur; + } + else { + options.sblur = options.tblur = 0.0f; + } + + OIIO::TextureSystem::TextureHandle *handle = *((OIIO::TextureSystem::TextureHandle**)info.data); + kernel_assert(handle && kg->oiio->tex_sys->good(handle)); + if(handle && !kg->oiio->tex_sys->good(handle)) { + return r; + } + kg->oiio->tex_sys->texture(handle, + (OIIO::TextureSystem::Perthread *)kg->oiio_tdata, + options, + x, + y, + ds.dx, + ds.dy, + dt.dx, + dt.dy, + 4, + (float *)&r); +#endif + break; + } default: assert(0); - return make_float4( - TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); } + return info.compress_as_srgb ? color_srgb_to_linear_v4(r) : r; } ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, @@ -646,6 +748,8 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, case IMAGE_DATA_TYPE_NANOVDB_FLOAT3: return NanoVDBInterpolator<nanovdb::Vec3f>::interp_3d(info, P.x, P.y, P.z, interp); #endif + case IMAGE_DATA_TYPE_OIIO: + return make_float4(TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); default: assert(0); return make_float4( diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp index 40e485d27c0..a8b55d4e365 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp @@ -36,6 +36,8 @@ # endif #endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ +#define OIIO_NO_AVX 1 + #include "kernel/kernel.h" #define KERNEL_ARCH cpu_avx #include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp index 8c44238470e..e7f6714e761 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp @@ -37,6 +37,8 @@ # endif #endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ +#define OIIO_NO_AVX 1 + #include "kernel/kernel.h" #define KERNEL_ARCH cpu_avx2 #include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h index 132653fa7ca..bf0c863bc41 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h +++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h @@ -178,7 +178,7 @@ ccl_device_inline T kernel_tex_image_interp_nanovdb( } #endif -ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) +ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y, differential, differential, uint) { const TextureInfo &info = kernel_tex_fetch(__texture_info, id); diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h index bb6b8a40e8e..8940258535b 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h +++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h @@ -123,6 +123,10 @@ ccl_device_inline float4 svm_image_texture_read( return make_float4(r.x, r.y, r.z, r.w); } #endif + /* Unsupported. */ + else if (texture_type == IMAGE_DATA_TYPE_OIIO) { + return make_float4(TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); + } /* Byte */ else { uchar r = tex_fetch(uchar, info, data_offset); @@ -199,7 +203,7 @@ ccl_device_inline float svm_image_texture_frac(float x, int *ix) } \ (void)0 -ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) +ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y, differential, differential, uint) { const ccl_global TextureInfo *info = kernel_tex_info(kg, id); diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index 2b7c21d0bc4..938ab74ed15 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -56,6 +56,7 @@ #include "kernel/kernel_projection.h" #include "kernel/kernel_accumulate.h" +#include "kernel/kernel_light.h" #include "kernel/kernel_shader.h" // clang-format on @@ -1248,7 +1249,12 @@ bool OSLRenderServices::texture(ustring filename, } case OSLTextureHandle::SVM: { /* Packed texture. */ - float4 rgba = kernel_tex_image_interp(kernel_globals, handle->svm_slot, s, 1.0f - t); + differential ds, dt; + ds.dx = dsdx; + ds.dy = dsdy; + dt.dx = dtdx; + dt.dy = dtdy; + float4 rgba = kernel_tex_image_interp(kernel_globals, handle->svm_slot, s, 1.0f - t, ds, dt, sg->raytype); result[0] = rgba[0]; if (nchannels > 1) @@ -1280,7 +1286,7 @@ bool OSLRenderServices::texture(ustring filename, texture_thread_info, options, s, - t, + 1.0f - t, dsdx, dtdx, dsdy, @@ -1294,7 +1300,7 @@ bool OSLRenderServices::texture(ustring filename, status = ts->texture(filename, options, s, - t, + 1.0f - t, dsdx, dtdx, dsdy, diff --git a/intern/cycles/kernel/shaders/node_environment_texture.osl b/intern/cycles/kernel/shaders/node_environment_texture.osl index d04743eb368..d8b711b9c7f 100644 --- a/intern/cycles/kernel/shaders/node_environment_texture.osl +++ b/intern/cycles/kernel/shaders/node_environment_texture.osl @@ -47,7 +47,6 @@ shader node_environment_texture( string filename = "", string projection = "equirectangular", string interpolation = "linear", - int compress_as_srgb = 0, int ignore_alpha = 0, int unassociate_alpha = 0, int is_float = 1, @@ -79,7 +78,4 @@ shader node_environment_texture( if (!is_float) Color = min(Color, 1.0); } - - if (compress_as_srgb) - Color = color_srgb_to_scene_linear(Color); } diff --git a/intern/cycles/kernel/shaders/node_image_texture.osl b/intern/cycles/kernel/shaders/node_image_texture.osl index 9e2ef84c872..cb1c6caace2 100644 --- a/intern/cycles/kernel/shaders/node_image_texture.osl +++ b/intern/cycles/kernel/shaders/node_image_texture.osl @@ -59,7 +59,6 @@ color image_texture_lookup(string filename, float u, float v, output float Alpha, - int compress_as_srgb, int ignore_alpha, int unassociate_alpha, int is_float, @@ -89,10 +88,6 @@ color image_texture_lookup(string filename, rgb = min(rgb, 1.0); } - if (compress_as_srgb) { - rgb = color_srgb_to_scene_linear(rgb); - } - return rgb; } @@ -104,7 +99,6 @@ shader node_image_texture(int use_mapping = 0, string interpolation = "smartcubic", string extension = "periodic", float projection_blend = 0.0, - int compress_as_srgb = 0, int ignore_alpha = 0, int unassociate_alpha = 0, int is_tiled = 0, @@ -122,7 +116,6 @@ shader node_image_texture(int use_mapping = 0, p[0], p[1], Alpha, - compress_as_srgb, ignore_alpha, unassociate_alpha, is_float, @@ -201,7 +194,6 @@ shader node_image_texture(int use_mapping = 0, p[1], p[2], tmp_alpha, - compress_as_srgb, ignore_alpha, unassociate_alpha, is_float, @@ -215,7 +207,6 @@ shader node_image_texture(int use_mapping = 0, p[0], p[2], tmp_alpha, - compress_as_srgb, ignore_alpha, unassociate_alpha, is_float, @@ -229,7 +220,6 @@ shader node_image_texture(int use_mapping = 0, p[1], p[0], tmp_alpha, - compress_as_srgb, ignore_alpha, unassociate_alpha, is_float, @@ -245,7 +235,6 @@ shader node_image_texture(int use_mapping = 0, projected[0], projected[1], Alpha, - compress_as_srgb, ignore_alpha, unassociate_alpha, is_float, @@ -259,7 +248,6 @@ shader node_image_texture(int use_mapping = 0, projected[0], projected[1], Alpha, - compress_as_srgb, ignore_alpha, unassociate_alpha, is_float, diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 000da1fa615..8acb6edbb9b 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -327,10 +327,10 @@ ccl_device_noinline void svm_eval_nodes( break; # endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */ case NODE_TEX_IMAGE: - svm_node_tex_image(kg, sd, stack, node, &offset); + svm_node_tex_image(kg, sd, path_flag, stack, node, &offset); break; case NODE_TEX_IMAGE_BOX: - svm_node_tex_image_box(kg, sd, stack, node); + svm_node_tex_image_box(kg, sd, path_flag, stack, node); break; case NODE_TEX_NOISE: svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, &offset); @@ -437,7 +437,7 @@ ccl_device_noinline void svm_eval_nodes( svm_node_camera(kg, sd, stack, node.y, node.z, node.w); break; case NODE_TEX_ENVIRONMENT: - svm_node_tex_environment(kg, sd, stack, node); + svm_node_tex_environment(kg, sd, path_flag, stack, node); break; case NODE_TEX_SKY: svm_node_tex_sky(kg, sd, stack, node, &offset); diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index 9348ddabde5..c6005f93c14 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -16,14 +16,21 @@ CCL_NAMESPACE_BEGIN -ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint flags) +ccl_device float4 svm_image_texture(KernelGlobals *kg, + int id, + float x, + float y, + differential ds, + differential dt, + uint flags, + int path_flag) { if (id == -1) { return make_float4( TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); } - float4 r = kernel_tex_image_interp(kg, id, x, y); + float4 r = kernel_tex_image_interp(kg, id, x, y, ds, dt, path_flag); const float alpha = r.w; if ((flags & NODE_IMAGE_ALPHA_UNASSOCIATE) && alpha != 1.0f && alpha != 0.0f) { @@ -31,10 +38,6 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, r.w = alpha; } - if (flags & NODE_IMAGE_COMPRESS_AS_SRGB) { - r = color_srgb_to_linear_v4(r); - } - return r; } @@ -45,19 +48,21 @@ ccl_device_inline float3 texco_remap_square(float3 co) } ccl_device void svm_node_tex_image( - KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) + KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int *offset) { uint co_offset, out_offset, alpha_offset, flags; + uint projection, dx_offset, dy_offset, unused; svm_unpack_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags); + svm_unpack_node_uchar4(node.w, &projection, &dx_offset, &dy_offset, &unused); float3 co = stack_load_float3(stack, co_offset); float2 tex_co; - if (node.w == NODE_IMAGE_PROJ_SPHERE) { + if (projection == NODE_IMAGE_PROJ_SPHERE) { co = texco_remap_square(co); tex_co = map_to_sphere(co); } - else if (node.w == NODE_IMAGE_PROJ_TUBE) { + else if (projection == NODE_IMAGE_PROJ_TUBE) { co = texco_remap_square(co); tex_co = map_to_tube(co); } @@ -65,6 +70,40 @@ ccl_device void svm_node_tex_image( tex_co = make_float2(co.x, co.y); } + differential ds, dt; +#ifdef __KERNEL_CPU__ + if (stack_valid(dx_offset) && stack_valid(dy_offset)) { + float3 dx = stack_load_float3(stack, dx_offset); + float3 dy = stack_load_float3(stack, dy_offset); + float2 tex_co_dx, tex_co_dy; + if (projection == NODE_IMAGE_PROJ_SPHERE) { + dx = texco_remap_square(dx); + tex_co_dx = map_to_sphere(dx); + dy = texco_remap_square(dy); + tex_co_dy = map_to_sphere(dy); + } + else if (projection == NODE_IMAGE_PROJ_TUBE) { + dx = texco_remap_square(dx); + tex_co_dx = map_to_tube(dx); + dy = texco_remap_square(dy); + tex_co_dy = map_to_tube(dy); + } + else { + tex_co_dx = make_float2(dx.x, dx.y); + tex_co_dy = make_float2(dy.x, dy.y); + } + ds.dx = tex_co_dx.x - tex_co.x; + ds.dy = tex_co_dy.x - tex_co.x; + dt.dx = tex_co_dx.y - tex_co.y; + dt.dy = tex_co_dy.y - tex_co.y; + } + else +#endif + { + ds = differential_zero(); + dt = differential_zero(); + } + /* TODO(lukas): Consider moving tile information out of the SVM node. * TextureInfo seems a reasonable candidate. */ int id = -1; @@ -108,7 +147,7 @@ ccl_device void svm_node_tex_image( id = -num_nodes; } - float4 f = svm_image_texture(kg, id, tex_co.x, tex_co.y, flags); + float4 f = svm_image_texture(kg, id, tex_co.x, tex_co.y, ds, dt, flags, path_flag); if (stack_valid(out_offset)) stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z)); @@ -116,7 +155,8 @@ ccl_device void svm_node_tex_image( stack_store_float(stack, alpha_offset, f.w); } -ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +ccl_device void svm_node_tex_image_box( + KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node) { /* get object space normal */ float3 N = sd->N; @@ -144,7 +184,9 @@ ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float * 7 zones, with an if() test for each zone */ float3 weight = make_float3(0.0f, 0.0f, 0.0f); - float blend = __int_as_float(node.w); + uint blend_hi, blend_lo, dx_offset, dy_offset; + svm_unpack_node_uchar4(node.w, &blend_hi, &blend_lo, &dx_offset, &dy_offset); + float blend = ((blend_hi << 8) + blend_lo) / 65536.0f; float limit = 0.5f * (1.0f + blend); /* first test for corners with single texture */ @@ -195,30 +237,49 @@ ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float float4 f = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + float3 co_dx = make_float3(0.0f, 0.0f, 0.0f); + float3 co_dy = make_float3(0.0f, 0.0f, 0.0f); + differential ds, dt; +#ifdef __KERNEL_CPU__ + if (stack_valid(dx_offset) && stack_valid(dy_offset)) { + co_dx = co - stack_load_float3(stack, dx_offset); + co_dy = co - stack_load_float3(stack, dy_offset); + } +#endif + /* Map so that no textures are flipped, rotation is somewhat arbitrary. */ if (weight.x > 0.0f) { float2 uv = make_float2((signed_N.x < 0.0f) ? 1.0f - co.y : co.y, co.z); - f += weight.x * svm_image_texture(kg, id, uv.x, uv.y, flags); + ds.dx = co_dx.y; + ds.dy = co_dy.y; + dt.dx = co_dx.z; + dt.dy = co_dy.z; + f += weight.x * svm_image_texture(kg, id, uv.x, uv.y, ds, dt, flags, path_flag); } if (weight.y > 0.0f) { - float2 uv = make_float2((signed_N.y > 0.0f) ? 1.0f - co.x : co.x, co.z); - f += weight.y * svm_image_texture(kg, id, uv.x, uv.y, flags); + float2 uv = make_float2((signed_N.y < 0.0f) ? 1.0f - co.x : co.x, co.z); + ds.dx = co_dx.x; + ds.dy = co_dy.x; + dt.dx = co_dx.z; + dt.dy = co_dy.z; + f += weight.y * svm_image_texture(kg, id, uv.x, uv.y, ds, dt, flags, path_flag); } if (weight.z > 0.0f) { - float2 uv = make_float2((signed_N.z > 0.0f) ? 1.0f - co.y : co.y, co.x); - f += weight.z * svm_image_texture(kg, id, uv.x, uv.y, flags); + float2 uv = make_float2((signed_N.z < 0.0f) ? 1.0f - co.y : co.y, co.x); + ds.dx = co_dx.y; + ds.dy = co_dy.y; + dt.dx = co_dx.x; + dt.dy = co_dy.x; + f += weight.z * svm_image_texture(kg, id, uv.x, uv.y, ds, dt, flags, path_flag); } - if (stack_valid(out_offset)) stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z)); if (stack_valid(alpha_offset)) stack_store_float(stack, alpha_offset, f.w); } -ccl_device void svm_node_tex_environment(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint4 node) +ccl_device void svm_node_tex_environment( + KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node) { uint id = node.y; uint co_offset, out_offset, alpha_offset, flags; @@ -236,7 +297,8 @@ ccl_device void svm_node_tex_environment(KernelGlobals *kg, else uv = direction_to_mirrorball(co); - float4 f = svm_image_texture(kg, id, uv.x, uv.y, flags); + float4 f = svm_image_texture( + kg, id, uv.x, uv.y, differential_zero(), differential_zero(), flags, path_flag); if (stack_valid(out_offset)) stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z)); diff --git a/intern/cycles/kernel/svm/svm_sky.h b/intern/cycles/kernel/svm/svm_sky.h index b908732f026..47b18db1df0 100644 --- a/intern/cycles/kernel/svm/svm_sky.h +++ b/intern/cycles/kernel/svm/svm_sky.h @@ -141,6 +141,8 @@ ccl_device float3 sky_radiance_nishita(KernelGlobals *kg, float3 xyz; /* convert dir to spherical coordinates */ float2 direction = direction_to_spherical(dir); + differential ds, dt; + ds.dx = ds.dy = dt.dx = dt.dy = 0.0f; /* render above the horizon */ if (dir.z >= 0.0f) { @@ -184,7 +186,7 @@ ccl_device float3 sky_radiance_nishita(KernelGlobals *kg, if (x > 1.0f) { x -= 1.0f; } - xyz = float4_to_float3(kernel_tex_image_interp(kg, texture_id, x, y)); + xyz = float4_to_float3(kernel_tex_image_interp(kg, texture_id, x, y, ds, dt, 0)); } } /* ground */ @@ -201,7 +203,7 @@ ccl_device float3 sky_radiance_nishita(KernelGlobals *kg, if (x > 1.0f) { x -= 1.0f; } - xyz = float4_to_float3(kernel_tex_image_interp(kg, texture_id, x, -0.5)) * fade; + xyz = float4_to_float3(kernel_tex_image_interp(kg, texture_id, x, -0.5f, ds, dt, 0)) * fade; } } diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h index fec6a2cc27f..f14f358fed8 100644 --- a/intern/cycles/kernel/svm/svm_tex_coord.h +++ b/intern/cycles/kernel/svm/svm_tex_coord.h @@ -121,6 +121,9 @@ ccl_device void svm_node_tex_coord_bump_dx( } case NODE_TEXCO_NORMAL: { data = sd->N; +# ifdef __DNDU__ + data = sd->N + sd->dNdx; +# endif object_inverse_normal_transform(kg, sd, &data); break; } @@ -201,6 +204,9 @@ ccl_device void svm_node_tex_coord_bump_dy( } case NODE_TEXCO_NORMAL: { data = sd->N; +# ifdef __DNDU__ + data = sd->N + sd->dNdy; +# endif object_inverse_normal_transform(kg, sd, &data); break; } diff --git a/intern/cycles/render/geometry.cpp b/intern/cycles/render/geometry.cpp index 7ec1d2d9abb..b01b1fe5a77 100644 --- a/intern/cycles/render/geometry.cpp +++ b/intern/cycles/render/geometry.cpp @@ -1739,7 +1739,7 @@ void GeometryManager::device_update_displacement_images(Device *device, ImageSlotTextureNode *image_node = static_cast<ImageSlotTextureNode *>(node); for (int i = 0; i < image_node->handle.num_tiles(); i++) { - const int slot = image_node->handle.svm_slot(i); + const int slot = image_node->handle.svm_slot(scene->shader_manager->use_osl(), i); if (slot != -1) { bump_images.insert(slot); } @@ -1748,10 +1748,12 @@ void GeometryManager::device_update_displacement_images(Device *device, } } } + /* foreach (int slot, bump_images) { pool.push(function_bind( &ImageManager::device_update_slot, image_manager, device, scene, slot, &progress)); - } + }*/ + image_manager->device_update(device, scene, progress); pool.wait_work(); } diff --git a/intern/cycles/render/graph.cpp b/intern/cycles/render/graph.cpp index e9da48b624d..9c356656f81 100644 --- a/intern/cycles/render/graph.cpp +++ b/intern/cycles/render/graph.cpp @@ -383,6 +383,10 @@ void ShaderGraph::finalize(Scene *scene, bool do_bump, bool do_simplify, bool bu if (do_bump) bump_from_displacement(bump_in_object_space); + /* This must be after all bump nodes are created, + * so that bump map lookups can be mip mapped too. */ + add_differentials(); + ShaderInput *surface_in = output()->input("Surface"); ShaderInput *volume_in = output()->input("Volume"); @@ -954,6 +958,91 @@ void ShaderGraph::refine_bump_nodes() } } +void ShaderGraph::add_differentials() +{ + /* we transverse the node graph looking for texture nodes, when we find them, + * we copy the sub-graph defined from "Vector" + * input to the inputs "Vector_dx" and "Vector_dy" */ + + foreach (ShaderNode *node, nodes) { + if (node->special_type == SHADER_SPECIAL_TYPE_IMAGE_SLOT && node->input("Vector")->link && + node->input("Vector_dx") && node->input("Vector_dy")) { + ShaderInput *vector_input = node->input("Vector"); + ShaderNodeSet nodes_vector; + + /* make 2 extra copies of the subgraph defined in Vector input */ + ShaderNodeMap nodes_dx; + ShaderNodeMap nodes_dy; + + /* find dependencies for the given input */ + find_dependencies(nodes_vector, vector_input); + + copy_nodes(nodes_vector, nodes_dx); + copy_nodes(nodes_vector, nodes_dy); + + /* First: Nodes that have no bump are set to center, others are left untouched. */ + foreach (ShaderNode *node, nodes_vector) + node->bump = node->bump == SHADER_BUMP_NONE ? SHADER_BUMP_CENTER : node->bump; + + /* Second: Nodes that have no bump are set DX, others are shifted by one. */ + foreach (NodePair &pair, nodes_dx) { + switch (pair.second->bump) { + case SHADER_BUMP_DX: + pair.second->bump = SHADER_BUMP_DY; + break; + case SHADER_BUMP_DY: + pair.second->bump = SHADER_BUMP_CENTER; + break; + default: + pair.second->bump = SHADER_BUMP_DX; + } + } + + /* Second: Nodes that have no bump are set DY, others are shifted by two. */ + foreach (NodePair &pair, nodes_dy) { + switch (pair.second->bump) { + case SHADER_BUMP_DX: + pair.second->bump = SHADER_BUMP_CENTER; + break; + case SHADER_BUMP_DY: + pair.second->bump = SHADER_BUMP_DX; + break; + default: + pair.second->bump = SHADER_BUMP_DY; + } + } + + ShaderOutput *out = vector_input->link; + ShaderOutput *out_dx = nodes_dx[out->parent]->output(out->name()); + ShaderOutput *out_dy = nodes_dy[out->parent]->output(out->name()); + + /* Insert mapping nodes that are duplicates of what's inside the image node. + * This is somewhat wasteful, it would be better to have a MappingNode + * that does three transforms at a time. */ + MappingNode *mapping1 = create_node<MappingNode>(); + MappingNode *mapping2 = create_node<MappingNode>(); + mapping1->set_location(((ImageTextureNode *)node)->tex_mapping.translation); + mapping1->set_rotation(((ImageTextureNode *)node)->tex_mapping.rotation); + mapping1->set_scale(((ImageTextureNode *)node)->tex_mapping.scale); + mapping2->set_location(((ImageTextureNode *)node)->tex_mapping.translation); + mapping2->set_rotation(((ImageTextureNode *)node)->tex_mapping.rotation); + mapping2->set_scale(((ImageTextureNode *)node)->tex_mapping.scale); + add(mapping1); + add(mapping2); + connect(out_dx, mapping1->input("Vector")); + connect(out_dy, mapping2->input("Vector")); + connect(mapping1->output("Vector"), node->input("Vector_dx")); + connect(mapping2->output("Vector"), node->input("Vector_dy")); + + /* add generated nodes */ + foreach (NodePair &pair, nodes_dx) + add(pair.second); + foreach (NodePair &pair, nodes_dy) + add(pair.second); + } + } +} + void ShaderGraph::bump_from_displacement(bool use_object_space) { /* generate bump mapping automatically from displacement. bump mapping is diff --git a/intern/cycles/render/graph.h b/intern/cycles/render/graph.h index 5102b182593..b39dc9c8b71 100644 --- a/intern/cycles/render/graph.h +++ b/intern/cycles/render/graph.h @@ -381,6 +381,7 @@ class ShaderGraph : public NodeOwner { void break_cycles(ShaderNode *node, vector<bool> &visited, vector<bool> &on_stack); void bump_from_displacement(bool use_object_space); void refine_bump_nodes(); + void add_differentials(); void expand(); void default_inputs(bool do_osl); void transform_multi_closure(ShaderNode *node, ShaderOutput *weight_out, bool volume); diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 27f9b7df1dd..fe74ad1f7aa 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -36,6 +36,8 @@ # include <OSL/oslexec.h> #endif +#include "kernel/kernel_oiio_globals.h" + CCL_NAMESPACE_BEGIN namespace { @@ -77,6 +79,8 @@ const char *name_from_type(ImageDataType type) return "nanovdb_float"; case IMAGE_DATA_TYPE_NANOVDB_FLOAT3: return "nanovdb_float3"; + case IMAGE_DATA_TYPE_OIIO: + return "openimageio"; case IMAGE_DATA_NUM_TYPES: assert(!"System enumerator type, should never be used"); return ""; @@ -151,13 +155,13 @@ ImageMetaData ImageHandle::metadata() return img->metadata; } -int ImageHandle::svm_slot(const int tile_index) const +int ImageHandle::svm_slot(bool osl, const int tile_index) const { if (tile_index >= tile_slots.size()) { return -1; } - if (manager->osl_texture_system) { + if (osl) { ImageManager::Image *img = manager->images[tile_slots[tile_index]]; if (!img->loader->osl_filepath().empty()) { return -1; @@ -239,6 +243,11 @@ bool ImageMetaData::is_float() const void ImageMetaData::detect_colorspace() { + if (type == IMAGE_DATA_TYPE_OIIO) { + compress_as_srgb = false; + return; + } + /* Convert used specified color spaces to one we know how to handle. */ colorspace = ColorSpaceManager::detect_known_colorspace( colorspace, colorspace_file_format, is_float()); @@ -299,12 +308,13 @@ bool ImageLoader::is_vdb_loader() const ImageManager::ImageManager(const DeviceInfo &info) { need_update_ = true; - osl_texture_system = NULL; + oiio_texture_system = NULL; animation_frame = 0; /* Set image limits */ features.has_half_float = info.has_half_images; features.has_nanovdb = info.has_nanovdb; + features.has_texture_cache = false; } ImageManager::~ImageManager() @@ -313,9 +323,10 @@ ImageManager::~ImageManager() assert(!images[slot]); } -void ImageManager::set_osl_texture_system(void *texture_system) +void ImageManager::set_oiio_texture_system(void *texture_system) { - osl_texture_system = texture_system; + oiio_texture_system = texture_system; + features.has_texture_cache = texture_system != NULL; } bool ImageManager::set_animation_frame_update(int frame) @@ -394,6 +405,28 @@ ImageHandle ImageManager::add_image(const string &filename, return handle; } +const string ImageManager::get_mip_map_path(const string &filename) +{ + if (!path_exists(filename)) { + return ""; + } + + string::size_type idx = filename.rfind('.'); + if (idx != string::npos) { + std::string extension = filename.substr(idx + 1); + if (extension == "tx") { + return filename; + } + } + + string tx_name = filename.substr(0, idx) + ".tx"; + if (path_exists(tx_name)) { + return tx_name; + } + + return ""; +} + ImageHandle ImageManager::add_image(ImageLoader *loader, const ImageParams ¶ms, const bool builtin) @@ -440,7 +473,7 @@ int ImageManager::add_image_slot(ImageLoader *loader, img->params = params; img->loader = loader; img->need_metadata = true; - img->need_load = !(osl_texture_system && !img->loader->osl_filepath().empty()); + img->need_load = !(oiio_texture_system && !img->loader->osl_filepath().empty()); img->builtin = builtin; img->users = 1; img->mem = NULL; @@ -650,6 +683,23 @@ void ImageManager::device_load_image(Device *device, Scene *scene, int slot, Pro Image *img = images[slot]; + if (features.has_texture_cache && !img->builtin) { + /* Get or generate a mip mapped tile image file. + * If we have a mip map, assume it's linear, not sRGB. */ + const char *cache_path = scene->params.texture.use_custom_cache_path ? + scene->params.texture.custom_cache_path.c_str() : + NULL; + bool have_mip = ((OIIOImageLoader *)img->loader) + ->get_tx(img->metadata.colorspace, + img->params.extension, + progress, + scene->params.texture.auto_convert, + cache_path); + if (have_mip) { + img->need_metadata = true; + } + } + progress->set_status("Updating Images", "Loading " + img->loader->name()); const int texture_limit = scene->params.texture_limit; @@ -671,6 +721,7 @@ void ImageManager::device_load_image(Device *device, Scene *scene, int slot, Pro device, img->mem_name.c_str(), slot, type, img->params.interpolation, img->params.extension); img->mem->info.use_transform_3d = img->metadata.use_transform_3d; img->mem->info.transform_3d = img->metadata.transform_3d; + img->mem->info.compress_as_srgb = img->metadata.compress_as_srgb; /* Create new texture. */ if (type == IMAGE_DATA_TYPE_FLOAT4) { @@ -767,7 +818,17 @@ void ImageManager::device_load_image(Device *device, Scene *scene, int slot, Pro } } #endif + else if (type == IMAGE_DATA_TYPE_OIIO) { + thread_scoped_lock device_lock(device_mutex); + void *pixels = img->mem->alloc(1, 1); + if (pixels != NULL) { + OIIO::TextureSystem *tex_sys = (OIIO::TextureSystem *)oiio_texture_system; + OIIO::TextureSystem::TextureHandle *handle = tex_sys->get_texture_handle( + OIIO::ustring(img->loader->osl_filepath())); + *((OIIO::TextureSystem::TextureHandle **)pixels) = tex_sys->good(handle) ? handle : NULL; + } + } { thread_scoped_lock device_lock(device_mutex); img->mem->copy_to_device(); @@ -785,11 +846,11 @@ void ImageManager::device_free_image(Device *, int slot) return; } - if (osl_texture_system) { + if (oiio_texture_system) { #ifdef WITH_OSL ustring filepath = img->loader->osl_filepath(); if (!filepath.empty()) { - ((OSL::TextureSystem *)osl_texture_system)->invalidate(filepath); + ((OIIO::TextureSystem *)oiio_texture_system)->invalidate(filepath); } #endif } diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index dede9513d5f..dd6602a4eeb 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -102,6 +102,7 @@ class ImageDeviceFeatures { public: bool has_half_float; bool has_nanovdb; + bool has_texture_cache; }; /* Image loader base class, that can be subclassed to load image data @@ -157,7 +158,7 @@ class ImageHandle { int num_tiles(); ImageMetaData metadata(); - int svm_slot(const int tile_index = 0) const; + int svm_slot(bool osl = false, const int tile_index = 0) const; device_texture *image_memory(const int tile_index = 0) const; VDBImageLoader *vdb_loader(const int tile_index = 0) const; @@ -191,7 +192,9 @@ class ImageManager { void device_load_builtin(Device *device, Scene *scene, Progress &progress); void device_free_builtin(Device *device); - void set_osl_texture_system(void *texture_system); + void set_oiio_texture_system(void *texture_system); + const string get_mip_map_path(const string &filename); + void set_pack_images(bool pack_images_); bool set_animation_frame_update(int frame); void collect_statistics(RenderStats *stats); @@ -227,7 +230,7 @@ class ImageManager { int animation_frame; vector<Image *> images; - void *osl_texture_system; + void *oiio_texture_system; int add_image_slot(ImageLoader *loader, const ImageParams ¶ms, const bool builtin); void add_image_user(int slot); diff --git a/intern/cycles/render/image_oiio.cpp b/intern/cycles/render/image_oiio.cpp index 4867efe6ac0..05e6972f822 100644 --- a/intern/cycles/render/image_oiio.cpp +++ b/intern/cycles/render/image_oiio.cpp @@ -19,6 +19,9 @@ #include "util/util_image.h" #include "util/util_logging.h" #include "util/util_path.h" +#include "util/util_progress.h" + +#include <OpenImageIO/imagebufalgo.h> CCL_NAMESPACE_BEGIN @@ -62,7 +65,8 @@ bool OIIOImageLoader::load_metadata(const ImageDeviceFeatures &features, ImageMe size_t channel_size = spec.format.basesize(); bool is_float = false; - bool is_half = false; + bool is_half = spec.format == TypeDesc::HALF && features.has_half_float; + bool is_tiled = spec.tile_pixels() != 0; if (spec.format.is_floating_point()) { is_float = true; @@ -75,15 +79,13 @@ bool OIIOImageLoader::load_metadata(const ImageDeviceFeatures &features, ImageMe } } - /* check if it's half float */ - if (spec.format == TypeDesc::HALF && features.has_half_float) { - is_half = true; - } - /* set type and channels */ metadata.channels = spec.nchannels; - if (is_half) { + if (is_tiled && features.has_texture_cache) { + metadata.type = IMAGE_DATA_TYPE_OIIO; + } + else if (is_half) { metadata.type = (metadata.channels > 1) ? IMAGE_DATA_TYPE_HALF4 : IMAGE_DATA_TYPE_HALF; } else if (is_float) { @@ -211,6 +213,7 @@ bool OIIOImageLoader::load_pixels(const ImageMetaData &metadata, break; case IMAGE_DATA_TYPE_NANOVDB_FLOAT: case IMAGE_DATA_TYPE_NANOVDB_FLOAT3: + case IMAGE_DATA_TYPE_OIIO: case IMAGE_DATA_NUM_TYPES: break; } @@ -235,4 +238,85 @@ bool OIIOImageLoader::equals(const ImageLoader &other) const return filepath == other_loader.filepath; } +bool OIIOImageLoader::make_tx(const string &filename, + const string &outputfilename, + const ustring &colorspace, + ExtensionType extension) +{ + ImageSpec config; + config.attribute("maketx:filtername", "lanczos3"); + config.attribute("maketx:opaque_detect", 1); + config.attribute("maketx:highlightcomp", 1); + config.attribute("maketx:oiio_options", 1); + config.attribute("maketx:updatemode", 1); + + switch (extension) { + case EXTENSION_CLIP: + config.attribute("maketx:wrap", "black"); + break; + case EXTENSION_REPEAT: + config.attribute("maketx:wrap", "periodic"); + break; + case EXTENSION_EXTEND: + config.attribute("maketx:wrap", "clamp"); + break; + default: + assert(0); + break; + } + + /* Convert textures to linear color space before mip mapping. */ + if (colorspace != u_colorspace_raw) { + if (colorspace == u_colorspace_srgb || colorspace.empty()) { + config.attribute("maketx:incolorspace", "sRGB"); + } + else { + config.attribute("maketx:incolorspace", colorspace.c_str()); + } + config.attribute("maketx:outcolorspace", "linear"); + } + + return ImageBufAlgo::make_texture(ImageBufAlgo::MakeTxTexture, filename, outputfilename, config); +} + +bool OIIOImageLoader::get_tx(const ustring &colorspace, + const ExtensionType &extension, + Progress *progress, + bool auto_convert, + const char *cache_path) +{ + if (!path_exists(osl_filepath().c_str())) { + return false; + } + + string::size_type idx = osl_filepath().rfind('.'); + if (idx != string::npos) { + string extension = osl_filepath().substr(idx + 1).c_str(); + if (extension == "tx") { + return true; + } + } + + string tx_name = string(osl_filepath().substr(0, idx).c_str()) + ".tx"; + if (cache_path) { + string filename = path_filename(tx_name); + tx_name = path_join(string(cache_path), filename); + } + if (path_exists(tx_name)) { + filepath = tx_name; + return true; + } + + if (auto_convert && progress) { + progress->set_status("Updating Images", string("Converting ") + osl_filepath().c_str()); + + bool ok = make_tx(osl_filepath().c_str(), tx_name, colorspace, extension); + if (ok) { + filepath = tx_name; + return true; + } + } + return false; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/render/image_oiio.h b/intern/cycles/render/image_oiio.h index a6dbb168b65..b2a8c5ebdad 100644 --- a/intern/cycles/render/image_oiio.h +++ b/intern/cycles/render/image_oiio.h @@ -39,6 +39,17 @@ class OIIOImageLoader : public ImageLoader { bool equals(const ImageLoader &other) const override; + bool get_tx(const ustring &colorspace, + const ExtensionType &extension, + Progress *progress, + bool auto_convert, + const char *cache_path); + + static bool make_tx(const string &filename, + const string &outputfilename, + const ustring &colorspace, + ExtensionType extension); + protected: ustring filepath; }; diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index 795166bcf4c..7337e19a929 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -250,7 +250,10 @@ NODE_DEFINE(ImageTextureNode) SOCKET_INT_ARRAY(tiles, "Tiles", array<int>()); SOCKET_BOOLEAN(animated, "Animated", false); + SOCKET_IN_POINT(vector, "Vector", zero_float3(), SocketType::LINK_TEXTURE_UV); + SOCKET_IN_POINT(vector_dx, "Vector_dx", zero_float3()); + SOCKET_IN_POINT(vector_dy, "Vector_dy", zero_float3()); SOCKET_OUT_COLOR(color, "Color"); SOCKET_OUT_FLOAT(alpha, "Alpha"); @@ -365,6 +368,8 @@ void ImageTextureNode::compile(SVMCompiler &compiler) ShaderInput *vector_in = input("Vector"); ShaderOutput *color_out = output("Color"); ShaderOutput *alpha_out = output("Alpha"); + ShaderInput *vector_dx = input("Vector_dx"); + ShaderInput *vector_dy = input("Vector_dy"); if (handle.empty()) { cull_tiles(compiler.scene, compiler.current_graph); @@ -403,22 +408,24 @@ void ImageTextureNode::compile(SVMCompiler &compiler) num_nodes = divide_up(handle.num_tiles(), 2); } - compiler.add_node(NODE_TEX_IMAGE, - num_nodes, - compiler.encode_uchar4(vector_offset, - compiler.stack_assign_if_linked(color_out), - compiler.stack_assign_if_linked(alpha_out), - flags), - projection); + compiler.add_node( + NODE_TEX_IMAGE, + num_nodes, + compiler.encode_uchar4(vector_offset, + compiler.stack_assign_if_linked(color_out), + compiler.stack_assign_if_linked(alpha_out), + flags), + compiler.encode_uchar4( + projection, compiler.stack_assign(vector_dx), compiler.stack_assign(vector_dy), 0)); if (num_nodes > 0) { for (int i = 0; i < num_nodes; i++) { int4 node; node.x = tiles[2 * i]; - node.y = handle.svm_slot(2 * i); + node.y = handle.svm_slot(false, 2 * i); if (2 * i + 1 < tiles.size()) { node.z = tiles[2 * i + 1]; - node.w = handle.svm_slot(2 * i + 1); + node.w = handle.svm_slot(false, 2 * i + 1); } else { node.z = -1; @@ -430,13 +437,19 @@ void ImageTextureNode::compile(SVMCompiler &compiler) } else { assert(handle.num_tiles() == 1); + /* Blend is a float between 0 and 1. Convert to 16 bit unsigned int to make room for vector_dx + * and vector_dy. */ + uint blend = clamp((uint)(projection_blend * 65535.0f), 0, 0xffff); compiler.add_node(NODE_TEX_IMAGE_BOX, handle.svm_slot(), compiler.encode_uchar4(vector_offset, compiler.stack_assign_if_linked(color_out), compiler.stack_assign_if_linked(alpha_out), flags), - __float_as_int(projection_blend)); + compiler.encode_uchar4(blend >> 8, + blend & 0xff, + compiler.stack_assign(vector_dx), + compiler.stack_assign(vector_dy))); } tex_mapping.compile_end(compiler, vector_in, vector_offset); @@ -458,12 +471,13 @@ void ImageTextureNode::compile(OSLCompiler &compiler) const bool compress_as_srgb = metadata.compress_as_srgb; const ustring known_colorspace = metadata.colorspace; - if (handle.svm_slot() == -1) { + if (handle.svm_slot(true) == -1) { + filename = compiler.scene->image_manager->get_mip_map_path(filename.string()); compiler.parameter_texture( "filename", filename, compress_as_srgb ? u_colorspace_raw : known_colorspace); } else { - compiler.parameter_texture("filename", handle.svm_slot()); + compiler.parameter_texture("filename", handle.svm_slot(true)); } const bool unassociate_alpha = !(ColorSpaceManager::colorspace_is_data(colorspace) || @@ -473,7 +487,6 @@ void ImageTextureNode::compile(OSLCompiler &compiler) compiler.parameter(this, "projection"); compiler.parameter(this, "projection_blend"); - compiler.parameter("compress_as_srgb", compress_as_srgb); compiler.parameter("ignore_alpha", alpha_type == IMAGE_ALPHA_IGNORE); compiler.parameter("unassociate_alpha", !alpha_out->links.empty() && unassociate_alpha); compiler.parameter("is_float", is_float); @@ -518,6 +531,8 @@ NODE_DEFINE(EnvironmentTextureNode) SOCKET_BOOLEAN(animated, "Animated", false); SOCKET_IN_POINT(vector, "Vector", zero_float3(), SocketType::LINK_POSITION); + SOCKET_IN_POINT(vector_dx, "Vector_dx", zero_float3()); + SOCKET_IN_POINT(vector_dy, "Vector_dy", zero_float3()); SOCKET_OUT_COLOR(color, "Color"); SOCKET_OUT_FLOAT(alpha, "Alpha"); @@ -567,6 +582,8 @@ void EnvironmentTextureNode::compile(SVMCompiler &compiler) ShaderInput *vector_in = input("Vector"); ShaderOutput *color_out = output("Color"); ShaderOutput *alpha_out = output("Alpha"); + ShaderInput *vector_dx = input("Vector_dx"); + ShaderInput *vector_dy = input("Vector_dy"); if (handle.empty()) { ImageManager *image_manager = compiler.scene->image_manager; @@ -609,17 +626,16 @@ void EnvironmentTextureNode::compile(OSLCompiler &compiler) const bool compress_as_srgb = metadata.compress_as_srgb; const ustring known_colorspace = metadata.colorspace; - if (handle.svm_slot() == -1) { + if (handle.svm_slot(true) == -1) { compiler.parameter_texture( "filename", filename, compress_as_srgb ? u_colorspace_raw : known_colorspace); } else { - compiler.parameter_texture("filename", handle.svm_slot()); + compiler.parameter_texture("filename", handle.svm_slot(true)); } compiler.parameter(this, "projection"); compiler.parameter(this, "interpolation"); - compiler.parameter("compress_as_srgb", compress_as_srgb); compiler.parameter("ignore_alpha", alpha_type == IMAGE_ALPHA_IGNORE); compiler.parameter("is_float", is_float); compiler.add(this, "node_environment_texture"); @@ -969,7 +985,7 @@ void SkyTextureNode::compile(OSLCompiler &compiler) compiler.parameter_array("nishita_data", sunsky.nishita_data, 10); /* nishita texture */ if (sky_type == NODE_SKY_NISHITA) { - compiler.parameter_texture("filename", handle.svm_slot()); + compiler.parameter_texture("filename", handle.svm_slot(true)); } compiler.add(this, "node_sky_texture"); } @@ -1861,7 +1877,7 @@ void PointDensityTextureNode::compile(OSLCompiler &compiler) handle = image_manager->add_image(filename.string(), image_params()); } - compiler.parameter_texture("filename", handle.svm_slot()); + compiler.parameter_texture("filename", handle.svm_slot(true)); if (space == NODE_TEX_VOXEL_SPACE_WORLD) { compiler.parameter("mapping", tfm); compiler.parameter("use_mapping", 1); diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h index 99cb0b779b8..03e80b3f3a4 100644 --- a/intern/cycles/render/nodes.h +++ b/intern/cycles/render/nodes.h @@ -128,6 +128,8 @@ class ImageTextureNode : public ImageSlotTextureNode { NODE_SOCKET_API(float, projection_blend) NODE_SOCKET_API(bool, animated) NODE_SOCKET_API(float3, vector) + NODE_SOCKET_API(float3, vector_dx) + NODE_SOCKET_API(float3, vector_dy) NODE_SOCKET_API(array<int>, tiles) protected: @@ -164,6 +166,8 @@ class EnvironmentTextureNode : public ImageSlotTextureNode { NODE_SOCKET_API(InterpolationType, interpolation) NODE_SOCKET_API(bool, animated) NODE_SOCKET_API(float3, vector) + NODE_SOCKET_API(float3, vector_dx) + NODE_SOCKET_API(float3, vector_dy) }; class SkyTextureNode : public TextureNode { diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp index 7dc79f48145..84b5cdd4d26 100644 --- a/intern/cycles/render/osl.cpp +++ b/intern/cycles/render/osl.cpp @@ -46,11 +46,7 @@ CCL_NAMESPACE_BEGIN #ifdef WITH_OSL -/* Shared Texture and Shading System */ - -OSL::TextureSystem *OSLShaderManager::ts_shared = NULL; -int OSLShaderManager::ts_shared_users = 0; -thread_mutex OSLShaderManager::ts_shared_mutex; +/* Shared Shading System */ OSL::ShadingSystem *OSLShaderManager::ss_shared = NULL; OSLRenderServices *OSLShaderManager::services_shared = NULL; @@ -110,7 +106,7 @@ void OSLShaderManager::device_update_specific(Device *device, device_free(device, dscene, scene); /* set texture system */ - scene->image_manager->set_osl_texture_system((void *)ts); + scene->image_manager->set_oiio_texture_system((void *)ts); /* create shaders */ OSLGlobals *og = (OSLGlobals *)device->osl_memory(); @@ -190,41 +186,6 @@ void OSLShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *s og->background_state.reset(); } -void OSLShaderManager::texture_system_init() -{ - /* create texture system, shared between different renders to reduce memory usage */ - thread_scoped_lock lock(ts_shared_mutex); - - if (ts_shared_users == 0) { - ts_shared = TextureSystem::create(true); - - ts_shared->attribute("automip", 1); - ts_shared->attribute("autotile", 64); - ts_shared->attribute("gray_to_rgb", 1); - - /* effectively unlimited for now, until we support proper mipmap lookups */ - ts_shared->attribute("max_memory_MB", 16384); - } - - ts = ts_shared; - ts_shared_users++; -} - -void OSLShaderManager::texture_system_free() -{ - /* shared texture system decrease users and destroy if no longer used */ - thread_scoped_lock lock(ts_shared_mutex); - ts_shared_users--; - - if (ts_shared_users == 0) { - ts_shared->invalidate_all(true); - OSL::TextureSystem::destroy(ts_shared); - ts_shared = NULL; - } - - ts = NULL; -} - void OSLShaderManager::shading_system_init() { /* create shading system, shared between different renders to reduce memory usage */ @@ -232,7 +193,7 @@ void OSLShaderManager::shading_system_init() if (ss_shared_users == 0) { /* Must use aligned new due to concurrent hash map. */ - services_shared = util_aligned_new<OSLRenderServices>(ts_shared); + services_shared = util_aligned_new<OSLRenderServices>(ts); string shader_path = path_get("shader"); # ifdef _WIN32 @@ -247,7 +208,7 @@ void OSLShaderManager::shading_system_init() shader_path = string_to_ansi(shader_path); # endif - ss_shared = new OSL::ShadingSystem(services_shared, ts_shared, &errhandler); + ss_shared = new OSL::ShadingSystem(services_shared, ts, &errhandler); ss_shared->attribute("lockgeom", 1); ss_shared->attribute("commonspace", "world"); ss_shared->attribute("searchpath:shader", shader_path); diff --git a/intern/cycles/render/osl.h b/intern/cycles/render/osl.h index dfeec54d915..8a2d5049e9b 100644 --- a/intern/cycles/render/osl.h +++ b/intern/cycles/render/osl.h @@ -103,22 +103,14 @@ class OSLShaderManager : public ShaderManager { const std::string &bytecode = ""); protected: - void texture_system_init(); - void texture_system_free(); - void shading_system_init(); void shading_system_free(); OSL::ShadingSystem *ss; - OSL::TextureSystem *ts; OSLRenderServices *services; OSL::ErrorHandler errhandler; map<string, OSLShaderInfo> loaded_shaders; - static OSL::TextureSystem *ts_shared; - static thread_mutex ts_shared_mutex; - static int ts_shared_users; - static OSL::ShadingSystem *ss_shared; static OSLRenderServices *services_shared; static thread_mutex ss_shared_mutex; diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index 7d8a6774381..842a341358a 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -138,6 +138,50 @@ class DeviceScene { DeviceScene(Device *device); }; +/* Texture Cache Params */ +class TextureCacheParams { + public: + TextureCacheParams() + : use_cache(false), + cache_size(1024), + tile_size(64), + diffuse_blur(1.0f / 64.f), + glossy_blur(0.0f), + auto_convert(true), + accept_unmipped(true), + accept_untiled(true), + auto_tile(true), + auto_mip(true), + use_custom_cache_path(false) + { + } + + bool modified(const TextureCacheParams ¶ms) + { + return !(use_cache == params.use_cache && cache_size == params.cache_size && + tile_size == params.tile_size && diffuse_blur == params.diffuse_blur && + glossy_blur == params.glossy_blur && auto_convert == params.auto_convert && + accept_unmipped == params.accept_unmipped && + accept_untiled == params.accept_untiled && auto_tile == params.auto_tile && + auto_mip == params.auto_mip && + use_custom_cache_path == params.use_custom_cache_path && + custom_cache_path == params.custom_cache_path); + } + + bool use_cache; + int cache_size; + int tile_size; + float diffuse_blur; + float glossy_blur; + bool auto_convert; + bool accept_unmipped; + bool accept_untiled; + bool auto_tile; + bool auto_mip; + bool use_custom_cache_path; + string custom_cache_path; +}; + /* Scene Parameters */ class SceneParams { @@ -179,6 +223,7 @@ class SceneParams { int hair_subdivisions; CurveShapeType hair_shape; int texture_limit; + TextureCacheParams texture; bool background; @@ -204,7 +249,7 @@ class SceneParams { use_bvh_unaligned_nodes == params.use_bvh_unaligned_nodes && num_bvh_time_steps == params.num_bvh_time_steps && hair_subdivisions == params.hair_subdivisions && hair_shape == params.hair_shape && - texture_limit == params.texture_limit); + texture_limit == params.texture_limit) || texture.modified(params.texture); } int curve_subdivisions() diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp index 59b60904746..ab471f13f2a 100644 --- a/intern/cycles/render/shader.cpp +++ b/intern/cycles/render/shader.cpp @@ -33,10 +33,13 @@ #include "render/tables.h" #include "util/util_foreach.h" +#include "util/util_logging.h" #include "util/util_murmurhash.h" #include "util/util_task.h" #include "util/util_transform.h" +#include "kernel/kernel_oiio_globals.h" +#include <OpenImageIO/texture.h> #ifdef WITH_OCIO # include <OpenColorIO/OpenColorIO.h> namespace OCIO = OCIO_NAMESPACE; @@ -45,6 +48,7 @@ namespace OCIO = OCIO_NAMESPACE; CCL_NAMESPACE_BEGIN thread_mutex ShaderManager::lookup_table_mutex; + vector<float> ShaderManager::beckmann_table; bool ShaderManager::beckmann_table_ready = false; @@ -491,10 +495,37 @@ void ShaderManager::device_update_common(Device *device, Progress & /*progress*/) { dscene->shaders.free(); - if (scene->shaders.size() == 0) return; + if (device->info.type == DEVICE_CPU && + (scene->params.shadingsystem == SHADINGSYSTEM_OSL || scene->params.texture.use_cache)) { + /* set texture system */ + scene->image_manager->set_oiio_texture_system((void *)ts); + OIIOGlobals *oiio_globals = (OIIOGlobals *)device->oiio_memory(); + if (oiio_globals) { + /* update attributes from scene parms */ + ts->attribute("autotile", + scene->params.texture.auto_tile ? scene->params.texture.tile_size : 0); + ts->attribute("automip", scene->params.texture.auto_mip ? 1 : 0); + ts->attribute("accept_unmipped", scene->params.texture.accept_unmipped ? 1 : 0); + ts->attribute("accept_untiled", scene->params.texture.accept_untiled ? 1 : 0); + ts->attribute("max_memory_MB", + scene->params.texture.cache_size > 0 ? + (float)scene->params.texture.cache_size : + 16384.0f); + ts->attribute("latlong_up", "z"); + ts->attribute("flip_t", 1); + ts->attribute("max_tile_channels", 1); + if (scene->params.texture_limit > 0) { + ts->attribute("max_mip_res", scene->params.texture_limit); + } + oiio_globals->tex_sys = ts; + oiio_globals->diffuse_blur = scene->params.texture.diffuse_blur; + oiio_globals->glossy_blur = scene->params.texture.glossy_blur; + } + } + KernelShader *kshader = dscene->shaders.alloc(scene->shaders.size()); bool has_volumes = false; bool has_transparent_shadow = false; @@ -750,6 +781,22 @@ void ShaderManager::free_memory() ColorSpaceManager::free_memory(); } +void ShaderManager::texture_system_init() +{ + ts = TextureSystem::create(true); + ts->attribute("gray_to_rgb", 1); + ts->attribute("forcefloat", 1); +} + +void ShaderManager::texture_system_free() +{ + VLOG(1) << ts->getstats(2); + ts->reset_stats(); + ts->invalidate_all(true); + TextureSystem::destroy(ts); + ts = NULL; +} + float ShaderManager::linear_rgb_to_gray(float3 c) { return dot(c, rgb_to_y); diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h index c65cac351a4..29d14700efd 100644 --- a/intern/cycles/render/shader.h +++ b/intern/cycles/render/shader.h @@ -23,11 +23,14 @@ # include <OSL/oslexec.h> #endif +#include <OpenImageIO/texture.h> + #include "kernel/kernel_types.h" #include "render/attribute.h" #include "graph/node.h" +#include "kernel/kernel_oiio_globals.h" #include "util/util_map.h" #include "util/util_param.h" #include "util/util_string.h" @@ -249,6 +252,11 @@ class ShaderManager { thread_spin_lock attribute_lock_; + void texture_system_init(); + void texture_system_free(); + + OIIO::TextureSystem *ts; + float3 xyz_to_r; float3 xyz_to_g; float3 xyz_to_b; diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp index dcb3976e15c..91811207a98 100644 --- a/intern/cycles/render/svm.cpp +++ b/intern/cycles/render/svm.cpp @@ -37,10 +37,12 @@ CCL_NAMESPACE_BEGIN SVMShaderManager::SVMShaderManager() { + texture_system_init(); } SVMShaderManager::~SVMShaderManager() { + texture_system_free(); } void SVMShaderManager::reset(Scene * /*scene*/) diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h index 71bf9c65911..de15fdb7b5a 100644 --- a/intern/cycles/util/util_texture.h +++ b/intern/cycles/util/util_texture.h @@ -50,6 +50,7 @@ typedef enum ImageDataType { IMAGE_DATA_TYPE_USHORT = 7, IMAGE_DATA_TYPE_NANOVDB_FLOAT = 8, IMAGE_DATA_TYPE_NANOVDB_FLOAT3 = 9, + IMAGE_DATA_TYPE_OIIO = 10, IMAGE_DATA_NUM_TYPES } ImageDataType; @@ -94,6 +95,7 @@ typedef struct TextureInfo { /* Transform for 3D textures. */ uint use_transform_3d; Transform transform_3d; + uint compress_as_srgb; } TextureInfo; CCL_NAMESPACE_END |