Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/CMakeLists.txt34
-rw-r--r--intern/cycles/kernel/closure/bsdf.h2
-rw-r--r--intern/cycles/kernel/device/cpu/kernel.cpp9
-rw-r--r--intern/cycles/kernel/device/cuda/compat.h5
-rw-r--r--intern/cycles/kernel/device/hip/compat.h1
-rw-r--r--intern/cycles/kernel/device/metal/compat.h1
-rw-r--r--intern/cycles/kernel/device/oneapi/compat.h1
-rw-r--r--intern/cycles/kernel/device/optix/compat.h31
-rw-r--r--intern/cycles/kernel/device/optix/globals.h7
-rw-r--r--intern/cycles/kernel/device/optix/kernel_osl.cu83
-rw-r--r--intern/cycles/kernel/integrator/displacement_shader.h4
-rw-r--r--intern/cycles/kernel/integrator/init_from_bake.h7
-rw-r--r--intern/cycles/kernel/integrator/surface_shader.h9
-rw-r--r--intern/cycles/kernel/integrator/volume_shader.h4
-rw-r--r--intern/cycles/kernel/osl/closures.cpp282
-rw-r--r--intern/cycles/kernel/osl/closures_setup.h23
-rw-r--r--intern/cycles/kernel/osl/closures_template.h4
-rw-r--r--intern/cycles/kernel/osl/osl.h183
-rw-r--r--intern/cycles/kernel/osl/services.cpp62
-rw-r--r--intern/cycles/kernel/osl/services.h10
-rw-r--r--intern/cycles/kernel/osl/services_gpu.h2176
-rw-r--r--intern/cycles/kernel/osl/services_optix.cu17
-rw-r--r--intern/cycles/kernel/osl/shaders/node_geometry.osl5
-rw-r--r--intern/cycles/kernel/osl/shaders/node_normal_map.osl7
-rw-r--r--intern/cycles/kernel/osl/shaders/node_tangent.osl5
-rw-r--r--intern/cycles/kernel/osl/shaders/node_texture_coordinate.osl7
-rw-r--r--intern/cycles/kernel/osl/types.h102
-rw-r--r--intern/cycles/kernel/svm/noise.h268
-rw-r--r--intern/cycles/kernel/types.h17
29 files changed, 2936 insertions, 430 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 3779fdc697a..99f9e536977 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -37,6 +37,14 @@ set(SRC_KERNEL_DEVICE_OPTIX
device/optix/kernel_shader_raytrace.cu
)
+if(WITH_CYCLES_OSL AND (OSL_LIBRARY_VERSION_MINOR GREATER_EQUAL 13 OR OSL_LIBRARY_VERSION_MAJOR GREATER 1))
+ set(SRC_KERNEL_DEVICE_OPTIX
+ ${SRC_KERNEL_DEVICE_OPTIX}
+ osl/services_optix.cu
+ device/optix/kernel_osl.cu
+ )
+endif()
+
set(SRC_KERNEL_DEVICE_ONEAPI
device/oneapi/kernel.cpp
)
@@ -181,6 +189,16 @@ set(SRC_KERNEL_SVM_HEADERS
svm/vertex_color.h
)
+if(WITH_CYCLES_OSL)
+ set(SRC_KERNEL_OSL_HEADERS
+ osl/osl.h
+ osl/closures_setup.h
+ osl/closures_template.h
+ osl/services_gpu.h
+ osl/types.h
+ )
+endif()
+
set(SRC_KERNEL_GEOM_HEADERS
geom/geom.h
geom/attribute.h
@@ -306,6 +324,7 @@ set(SRC_KERNEL_HEADERS
${SRC_KERNEL_GEOM_HEADERS}
${SRC_KERNEL_INTEGRATOR_HEADERS}
${SRC_KERNEL_LIGHT_HEADERS}
+ ${SRC_KERNEL_OSL_HEADERS}
${SRC_KERNEL_SAMPLE_HEADERS}
${SRC_KERNEL_SVM_HEADERS}
${SRC_KERNEL_TYPES_HEADERS}
@@ -328,6 +347,7 @@ set(SRC_UTIL_HEADERS
../util/math_int2.h
../util/math_int3.h
../util/math_int4.h
+ ../util/math_int8.h
../util/math_matrix.h
../util/projection.h
../util/rect.h
@@ -350,6 +370,8 @@ set(SRC_UTIL_HEADERS
../util/types_int3_impl.h
../util/types_int4.h
../util/types_int4_impl.h
+ ../util/types_int8.h
+ ../util/types_int8_impl.h
../util/types_spectrum.h
../util/types_uchar2.h
../util/types_uchar2_impl.h
@@ -705,6 +727,16 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
kernel_optix_shader_raytrace
"device/optix/kernel_shader_raytrace.cu"
"--keep-device-functions")
+ if(WITH_CYCLES_OSL AND (OSL_LIBRARY_VERSION_MINOR GREATER_EQUAL 13 OR OSL_LIBRARY_VERSION_MAJOR GREATER 1))
+ CYCLES_OPTIX_KERNEL_ADD(
+ kernel_optix_osl
+ "device/optix/kernel_osl.cu"
+ "--relocatable-device-code=true")
+ CYCLES_OPTIX_KERNEL_ADD(
+ kernel_optix_osl_services
+ "osl/services_optix.cu"
+ "--relocatable-device-code=true")
+ endif()
add_custom_target(cycles_kernel_optix ALL DEPENDS ${optix_ptx})
cycles_set_solution_folder(cycles_kernel_optix)
@@ -992,6 +1024,7 @@ source_group("geom" FILES ${SRC_KERNEL_GEOM_HEADERS})
source_group("integrator" FILES ${SRC_KERNEL_INTEGRATOR_HEADERS})
source_group("kernel" FILES ${SRC_KERNEL_TYPES_HEADERS})
source_group("light" FILES ${SRC_KERNEL_LIGHT_HEADERS})
+source_group("osl" FILES ${SRC_KERNEL_OSL_HEADERS})
source_group("sample" FILES ${SRC_KERNEL_SAMPLE_HEADERS})
source_group("svm" FILES ${SRC_KERNEL_SVM_HEADERS})
source_group("util" FILES ${SRC_KERNEL_UTIL_HEADERS})
@@ -1028,6 +1061,7 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_FILM_HEADERS}" ${CYCLE
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/geom)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_INTEGRATOR_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/integrator)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_LIGHT_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/light)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_OSL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/osl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_SAMPLE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/sample)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/svm)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_TYPES_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel)
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index 71af68aa80e..2f5c5d7bd0c 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -297,8 +297,10 @@ ccl_device_inline void bsdf_roughness_eta(const KernelGlobals kg,
ccl_private float2 *roughness,
ccl_private float *eta)
{
+#ifdef __SVM__
bool refractive = false;
float alpha = 1.0f;
+#endif
switch (sc->type) {
case CLOSURE_BSDF_DIFFUSE_ID:
*roughness = one_float2();
diff --git a/intern/cycles/kernel/device/cpu/kernel.cpp b/intern/cycles/kernel/device/cpu/kernel.cpp
index 01087c96dd6..558431961ab 100644
--- a/intern/cycles/kernel/device/cpu/kernel.cpp
+++ b/intern/cycles/kernel/device/cpu/kernel.cpp
@@ -7,6 +7,7 @@
* one with SSE2 intrinsics.
*/
#if defined(__x86_64__) || defined(_M_X64)
+# define __KERNEL_SSE__
# define __KERNEL_SSE2__
#endif
@@ -29,11 +30,15 @@
# define __KERNEL_SSE41__
# endif
# ifdef __AVX__
-# define __KERNEL_SSE__
+# ifndef __KERNEL_SSE__
+# define __KERNEL_SSE__
+# endif
# define __KERNEL_AVX__
# endif
# ifdef __AVX2__
-# define __KERNEL_SSE__
+# ifndef __KERNEL_SSE__
+# define __KERNEL_SSE__
+# endif
# define __KERNEL_AVX2__
# endif
#endif
diff --git a/intern/cycles/kernel/device/cuda/compat.h b/intern/cycles/kernel/device/cuda/compat.h
index 51e1381d552..3a950779c11 100644
--- a/intern/cycles/kernel/device/cuda/compat.h
+++ b/intern/cycles/kernel/device/cuda/compat.h
@@ -30,6 +30,7 @@ typedef unsigned long long uint64_t;
/* Qualifiers */
#define ccl_device __device__ __inline__
+#define ccl_device_extern extern "C" __device__
#if __CUDA_ARCH__ < 500
# define ccl_device_inline __device__ __forceinline__
# define ccl_device_forceinline __device__ __forceinline__
@@ -109,14 +110,14 @@ ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D
typedef unsigned short half;
-__device__ half __float2half(const float f)
+ccl_device_forceinline half __float2half(const float f)
{
half val;
asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
return val;
}
-__device__ float __half2float(const half h)
+ccl_device_forceinline float __half2float(const half h)
{
float val;
asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(h));
diff --git a/intern/cycles/kernel/device/hip/compat.h b/intern/cycles/kernel/device/hip/compat.h
index 648988c31b6..8755395c82c 100644
--- a/intern/cycles/kernel/device/hip/compat.h
+++ b/intern/cycles/kernel/device/hip/compat.h
@@ -28,6 +28,7 @@ typedef unsigned long long uint64_t;
/* Qualifiers */
#define ccl_device __device__ __inline__
+#define ccl_device_extern extern "C" __device__
#define ccl_device_inline __device__ __inline__
#define ccl_device_forceinline __device__ __forceinline__
#define ccl_device_noinline __device__ __noinline__
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h
index f689e93e5a2..2dd6cc98b59 100644
--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -38,6 +38,7 @@ using namespace metal::raytracing;
# define ccl_device_noinline ccl_device __attribute__((noinline))
#endif
+#define ccl_device_extern extern "C"
#define ccl_device_noinline_cpu ccl_device
#define ccl_device_inline_method ccl_device
#define ccl_global device
diff --git a/intern/cycles/kernel/device/oneapi/compat.h b/intern/cycles/kernel/device/oneapi/compat.h
index dfaec65130c..b83512180d7 100644
--- a/intern/cycles/kernel/device/oneapi/compat.h
+++ b/intern/cycles/kernel/device/oneapi/compat.h
@@ -28,6 +28,7 @@
/* Qualifier wrappers for different names on different devices */
#define ccl_device
+#define ccl_device_extern extern "C"
#define ccl_global
#define ccl_always_inline __attribute__((always_inline))
#define ccl_device_inline inline
diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h
index 1a11a533b7e..e13101f57b8 100644
--- a/intern/cycles/kernel/device/optix/compat.h
+++ b/intern/cycles/kernel/device/optix/compat.h
@@ -33,14 +33,16 @@ typedef unsigned long long uint64_t;
#endif
#define ccl_device \
- __device__ __forceinline__ // Function calls are bad for OptiX performance, so inline everything
+ static __device__ \
+ __forceinline__ // Function calls are bad for OptiX performance, so inline everything
+#define ccl_device_extern extern "C" __device__
#define ccl_device_inline ccl_device
#define ccl_device_forceinline ccl_device
-#define ccl_device_inline_method ccl_device
-#define ccl_device_noinline __device__ __noinline__
+#define ccl_device_inline_method __device__ __forceinline__
+#define ccl_device_noinline static __device__ __noinline__
#define ccl_device_noinline_cpu ccl_device
#define ccl_global
-#define ccl_inline_constant __constant__
+#define ccl_inline_constant static __constant__
#define ccl_device_constant __constant__ __device__
#define ccl_constant const
#define ccl_gpu_shared __shared__
@@ -57,23 +59,6 @@ typedef unsigned long long uint64_t;
#define kernel_assert(cond)
-/* GPU thread, block, grid size and index */
-
-#define ccl_gpu_thread_idx_x (threadIdx.x)
-#define ccl_gpu_block_dim_x (blockDim.x)
-#define ccl_gpu_block_idx_x (blockIdx.x)
-#define ccl_gpu_grid_dim_x (gridDim.x)
-#define ccl_gpu_warp_size (warpSize)
-#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
-
-#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
-#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
-
-/* GPU warp synchronization. */
-
-#define ccl_gpu_syncthreads() __syncthreads()
-#define ccl_gpu_ballot(predicate) __ballot_sync(0xFFFFFFFF, predicate)
-
/* GPU texture objects */
typedef unsigned long long CUtexObject;
@@ -101,14 +86,14 @@ ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D
typedef unsigned short half;
-__device__ half __float2half(const float f)
+ccl_device_forceinline half __float2half(const float f)
{
half val;
asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
return val;
}
-__device__ float __half2float(const half h)
+ccl_device_forceinline float __half2float(const half h)
{
float val;
asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(h));
diff --git a/intern/cycles/kernel/device/optix/globals.h b/intern/cycles/kernel/device/optix/globals.h
index 7af2e421378..126df74bc8c 100644
--- a/intern/cycles/kernel/device/optix/globals.h
+++ b/intern/cycles/kernel/device/optix/globals.h
@@ -25,6 +25,7 @@ struct KernelParamsOptiX {
/* Kernel arguments */
const int *path_index_array;
float *render_buffer;
+ int offset;
/* Global scene data and textures */
KernelData data;
@@ -36,7 +37,11 @@ struct KernelParamsOptiX {
};
#ifdef __NVCC__
-extern "C" static __constant__ KernelParamsOptiX kernel_params;
+extern "C"
+# ifndef __CUDACC_RDC__
+ static
+# endif
+ __constant__ KernelParamsOptiX kernel_params;
#endif
/* Abstraction macros */
diff --git a/intern/cycles/kernel/device/optix/kernel_osl.cu b/intern/cycles/kernel/device/optix/kernel_osl.cu
new file mode 100644
index 00000000000..0f3f477935b
--- /dev/null
+++ b/intern/cycles/kernel/device/optix/kernel_osl.cu
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#define WITH_OSL
+
+/* Copy of the regular OptiX kernels with additional OSL support. */
+
+#include "kernel/device/optix/kernel_shader_raytrace.cu"
+
+#include "kernel/bake/bake.h"
+#include "kernel/integrator/shade_background.h"
+#include "kernel/integrator/shade_light.h"
+#include "kernel/integrator/shade_shadow.h"
+#include "kernel/integrator/shade_volume.h"
+
+extern "C" __global__ void __raygen__kernel_optix_integrator_shade_background()
+{
+ const int global_index = optixGetLaunchIndex().x;
+ const int path_index = (kernel_params.path_index_array) ?
+ kernel_params.path_index_array[global_index] :
+ global_index;
+ integrator_shade_background(nullptr, path_index, kernel_params.render_buffer);
+}
+
+extern "C" __global__ void __raygen__kernel_optix_integrator_shade_light()
+{
+ const int global_index = optixGetLaunchIndex().x;
+ const int path_index = (kernel_params.path_index_array) ?
+ kernel_params.path_index_array[global_index] :
+ global_index;
+ integrator_shade_light(nullptr, path_index, kernel_params.render_buffer);
+}
+
+extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface()
+{
+ const int global_index = optixGetLaunchIndex().x;
+ const int path_index = (kernel_params.path_index_array) ?
+ kernel_params.path_index_array[global_index] :
+ global_index;
+ integrator_shade_surface(nullptr, path_index, kernel_params.render_buffer);
+}
+
+extern "C" __global__ void __raygen__kernel_optix_integrator_shade_volume()
+{
+ const int global_index = optixGetLaunchIndex().x;
+ const int path_index = (kernel_params.path_index_array) ?
+ kernel_params.path_index_array[global_index] :
+ global_index;
+ integrator_shade_volume(nullptr, path_index, kernel_params.render_buffer);
+}
+
+extern "C" __global__ void __raygen__kernel_optix_integrator_shade_shadow()
+{
+ const int global_index = optixGetLaunchIndex().x;
+ const int path_index = (kernel_params.path_index_array) ?
+ kernel_params.path_index_array[global_index] :
+ global_index;
+ integrator_shade_shadow(nullptr, path_index, kernel_params.render_buffer);
+}
+
+extern "C" __global__ void __raygen__kernel_optix_shader_eval_displace()
+{
+ KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array;
+ float *const output = kernel_params.render_buffer;
+ const int global_index = kernel_params.offset + optixGetLaunchIndex().x;
+ kernel_displace_evaluate(nullptr, input, output, global_index);
+}
+
+extern "C" __global__ void __raygen__kernel_optix_shader_eval_background()
+{
+ KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array;
+ float *const output = kernel_params.render_buffer;
+ const int global_index = kernel_params.offset + optixGetLaunchIndex().x;
+ kernel_background_evaluate(nullptr, input, output, global_index);
+}
+
+extern "C" __global__ void __raygen__kernel_optix_shader_eval_curve_shadow_transparency()
+{
+ KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array;
+ float *const output = kernel_params.render_buffer;
+ const int global_index = kernel_params.offset + optixGetLaunchIndex().x;
+ kernel_curve_shadow_transparency_evaluate(nullptr, input, output, global_index);
+}
diff --git a/intern/cycles/kernel/integrator/displacement_shader.h b/intern/cycles/kernel/integrator/displacement_shader.h
index 839dfe244ac..a6e9d674396 100644
--- a/intern/cycles/kernel/integrator/displacement_shader.h
+++ b/intern/cycles/kernel/integrator/displacement_shader.h
@@ -24,8 +24,8 @@ ccl_device void displacement_shader_eval(KernelGlobals kg,
/* this will modify sd->P */
#ifdef __OSL__
- if (kg->osl) {
- OSLShader::eval_displacement(kg, state, sd);
+ if (kernel_data.kernel_features & KERNEL_FEATURE_OSL) {
+ osl_eval_nodes<SHADER_TYPE_DISPLACEMENT>(kg, state, sd, 0);
}
else
#endif
diff --git a/intern/cycles/kernel/integrator/init_from_bake.h b/intern/cycles/kernel/integrator/init_from_bake.h
index 667ba949760..cc3fbe3fe39 100644
--- a/intern/cycles/kernel/integrator/init_from_bake.h
+++ b/intern/cycles/kernel/integrator/init_from_bake.h
@@ -156,6 +156,13 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
u = v;
v = 1.0f - tmp - v;
+ const float tmpdx = dudx;
+ const float tmpdy = dudy;
+ dudx = dvdx;
+ dudy = dvdy;
+ dvdx = -tmpdx - dvdx;
+ dvdy = -tmpdy - dvdy;
+
/* Position and normal on triangle. */
const int object = kernel_data.bake.object_index;
float3 P, Ng;
diff --git a/intern/cycles/kernel/integrator/surface_shader.h b/intern/cycles/kernel/integrator/surface_shader.h
index 6c0097b11bd..5e47a34f77e 100644
--- a/intern/cycles/kernel/integrator/surface_shader.h
+++ b/intern/cycles/kernel/integrator/surface_shader.h
@@ -827,13 +827,8 @@ ccl_device void surface_shader_eval(KernelGlobals kg,
sd->num_closure_left = max_closures;
#ifdef __OSL__
- if (kg->osl) {
- if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) {
- OSLShader::eval_background(kg, state, sd, path_flag);
- }
- else {
- OSLShader::eval_surface(kg, state, sd, path_flag);
- }
+ if (kernel_data.kernel_features & KERNEL_FEATURE_OSL) {
+ osl_eval_nodes<SHADER_TYPE_SURFACE>(kg, state, sd, path_flag);
}
else
#endif
diff --git a/intern/cycles/kernel/integrator/volume_shader.h b/intern/cycles/kernel/integrator/volume_shader.h
index 0ff968723a1..f9050647c6d 100644
--- a/intern/cycles/kernel/integrator/volume_shader.h
+++ b/intern/cycles/kernel/integrator/volume_shader.h
@@ -493,8 +493,8 @@ ccl_device_inline void volume_shader_eval(KernelGlobals kg,
/* evaluate shader */
# ifdef __OSL__
- if (kg->osl) {
- OSLShader::eval_volume(kg, state, sd, path_flag);
+ if (kernel_data.kernel_features & KERNEL_FEATURE_OSL) {
+ osl_eval_nodes<SHADER_TYPE_VOLUME>(kg, state, sd, path_flag);
}
else
# endif
diff --git a/intern/cycles/kernel/osl/closures.cpp b/intern/cycles/kernel/osl/closures.cpp
index d56e0551a91..6800c765345 100644
--- a/intern/cycles/kernel/osl/closures.cpp
+++ b/intern/cycles/kernel/osl/closures.cpp
@@ -25,13 +25,18 @@
#include "kernel/osl/osl.h"
-#include "kernel/osl/closures_setup.h"
-
#define TO_VEC3(v) OSL::Vec3(v.x, v.y, v.z)
#define TO_FLOAT3(v) make_float3(v[0], v[1], v[2])
CCL_NAMESPACE_BEGIN
+static_assert(sizeof(OSLClosure) == sizeof(OSL::ClosureColor) &&
+ sizeof(OSLClosureAdd) == sizeof(OSL::ClosureAdd) &&
+ sizeof(OSLClosureMul) == sizeof(OSL::ClosureMul) &&
+ sizeof(OSLClosureComponent) == sizeof(OSL::ClosureComponent));
+static_assert(sizeof(ShaderGlobals) == sizeof(OSL::ShaderGlobals) &&
+ offsetof(ShaderGlobals, Ci) == offsetof(OSL::ShaderGlobals, Ci));
+
/* Registration */
#define OSL_CLOSURE_STRUCT_BEGIN(Upper, lower) \
@@ -60,53 +65,18 @@ void OSLRenderServices::register_closures(OSL::ShadingSystem *ss)
#include "closures_template.h"
}
-/* Globals */
+/* Surface & Background */
-static void shaderdata_to_shaderglobals(const KernelGlobalsCPU *kg,
- ShaderData *sd,
- const void *state,
- uint32_t path_flag,
- OSLThreadData *tdata)
+template<>
+void osl_eval_nodes<SHADER_TYPE_SURFACE>(const KernelGlobalsCPU *kg,
+ const void *state,
+ ShaderData *sd,
+ uint32_t path_flag)
{
- OSL::ShaderGlobals *globals = &tdata->globals;
-
- const differential3 dP = differential_from_compact(sd->Ng, sd->dP);
- const differential3 dI = differential_from_compact(sd->I, sd->dI);
-
- /* copy from shader data to shader globals */
- globals->P = TO_VEC3(sd->P);
- globals->dPdx = TO_VEC3(dP.dx);
- globals->dPdy = TO_VEC3(dP.dy);
- globals->I = TO_VEC3(sd->I);
- globals->dIdx = TO_VEC3(dI.dx);
- globals->dIdy = TO_VEC3(dI.dy);
- globals->N = TO_VEC3(sd->N);
- globals->Ng = TO_VEC3(sd->Ng);
- globals->u = sd->u;
- globals->dudx = sd->du.dx;
- globals->dudy = sd->du.dy;
- globals->v = sd->v;
- globals->dvdx = sd->dv.dx;
- globals->dvdy = sd->dv.dy;
- globals->dPdu = TO_VEC3(sd->dPdu);
- globals->dPdv = TO_VEC3(sd->dPdv);
- globals->surfacearea = 1.0f;
- globals->time = sd->time;
-
- /* booleans */
- globals->raytype = path_flag;
- globals->flipHandedness = 0;
- globals->backfacing = (sd->flag & SD_BACKFACING);
-
- /* shader data to be used in services callbacks */
- globals->renderstate = sd;
-
- /* hacky, we leave it to services to fetch actual object matrix */
- globals->shader2common = sd;
- globals->object2common = sd;
-
- /* must be set to NULL before execute */
- globals->Ci = NULL;
+ /* setup shader globals from shader data */
+ OSLThreadData *tdata = kg->osl_tdata;
+ shaderdata_to_shaderglobals(
+ kg, sd, path_flag, reinterpret_cast<ShaderGlobals *>(&tdata->globals));
/* clear trace data */
tdata->tracedata.init = false;
@@ -121,53 +91,6 @@ static void shaderdata_to_shaderglobals(const KernelGlobalsCPU *kg,
sd->osl_path_state = (const IntegratorStateCPU *)state;
sd->osl_shadow_path_state = nullptr;
}
-}
-
-static void flatten_closure_tree(const KernelGlobalsCPU *kg,
- ShaderData *sd,
- uint32_t path_flag,
- const OSL::ClosureColor *closure,
- float3 weight = make_float3(1.0f, 1.0f, 1.0f))
-{
- /* OSL gives us a closure tree, we flatten it into arrays per
- * closure type, for evaluation, sampling, etc later on. */
-
- switch (closure->id) {
- case OSL::ClosureColor::MUL: {
- OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
- flatten_closure_tree(kg, sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight);
- break;
- }
- case OSL::ClosureColor::ADD: {
- OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
- flatten_closure_tree(kg, sd, path_flag, add->closureA, weight);
- flatten_closure_tree(kg, sd, path_flag, add->closureB, weight);
- break;
- }
-#define OSL_CLOSURE_STRUCT_BEGIN(Upper, lower) \
- case OSL_CLOSURE_##Upper##_ID: { \
- const OSL::ClosureComponent *comp = reinterpret_cast<const OSL::ClosureComponent *>(closure); \
- weight *= TO_FLOAT3(comp->w); \
- osl_closure_##lower##_setup( \
- kg, sd, path_flag, weight, reinterpret_cast<const Upper##Closure *>(comp + 1)); \
- break; \
- }
-#include "closures_template.h"
- default:
- break;
- }
-}
-
-/* Surface */
-
-void OSLShader::eval_surface(const KernelGlobalsCPU *kg,
- const void *state,
- ShaderData *sd,
- uint32_t path_flag)
-{
- /* setup shader globals from shader data */
- OSLThreadData *tdata = kg->osl_tdata;
- shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
/* execute shader for this point */
OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
@@ -175,101 +98,99 @@ void OSLShader::eval_surface(const KernelGlobalsCPU *kg,
OSL::ShadingContext *octx = tdata->context;
int shader = sd->shader & SHADER_MASK;
- /* automatic bump shader */
- if (kg->osl->bump_state[shader]) {
- /* save state */
- const float3 P = sd->P;
- const float dP = sd->dP;
- const OSL::Vec3 dPdx = globals->dPdx;
- const OSL::Vec3 dPdy = globals->dPdy;
-
- /* set state as if undisplaced */
- if (sd->flag & SD_HAS_DISPLACEMENT) {
- float data[9];
- bool found = kg->osl->services->get_attribute(sd,
- true,
- OSLRenderServices::u_empty,
- TypeDesc::TypeVector,
- OSLRenderServices::u_geom_undisplaced,
- data);
- (void)found;
- assert(found);
-
- differential3 tmp_dP;
- memcpy(&sd->P, data, sizeof(float) * 3);
- memcpy(&tmp_dP.dx, data + 3, sizeof(float) * 3);
- memcpy(&tmp_dP.dy, data + 6, sizeof(float) * 3);
-
- object_position_transform(kg, sd, &sd->P);
- object_dir_transform(kg, sd, &tmp_dP.dx);
- object_dir_transform(kg, sd, &tmp_dP.dy);
-
- sd->dP = differential_make_compact(tmp_dP);
-
- globals->P = TO_VEC3(sd->P);
- globals->dPdx = TO_VEC3(tmp_dP.dx);
- globals->dPdy = TO_VEC3(tmp_dP.dy);
+ if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) {
+ /* background */
+ if (kg->osl->background_state) {
+ ss->execute(octx, *(kg->osl->background_state), *globals);
}
-
- /* execute bump shader */
- ss->execute(octx, *(kg->osl->bump_state[shader]), *globals);
-
- /* reset state */
- sd->P = P;
- sd->dP = dP;
-
- globals->P = TO_VEC3(P);
- globals->dPdx = TO_VEC3(dPdx);
- globals->dPdy = TO_VEC3(dPdy);
}
+ else {
+ /* automatic bump shader */
+ if (kg->osl->bump_state[shader]) {
+ /* save state */
+ const float3 P = sd->P;
+ const float dP = sd->dP;
+ const OSL::Vec3 dPdx = globals->dPdx;
+ const OSL::Vec3 dPdy = globals->dPdy;
+
+ /* set state as if undisplaced */
+ if (sd->flag & SD_HAS_DISPLACEMENT) {
+ float data[9];
+ bool found = kg->osl->services->get_attribute(sd,
+ true,
+ OSLRenderServices::u_empty,
+ TypeDesc::TypeVector,
+ OSLRenderServices::u_geom_undisplaced,
+ data);
+ (void)found;
+ assert(found);
+
+ differential3 tmp_dP;
+ memcpy(&sd->P, data, sizeof(float) * 3);
+ memcpy(&tmp_dP.dx, data + 3, sizeof(float) * 3);
+ memcpy(&tmp_dP.dy, data + 6, sizeof(float) * 3);
+
+ object_position_transform(kg, sd, &sd->P);
+ object_dir_transform(kg, sd, &tmp_dP.dx);
+ object_dir_transform(kg, sd, &tmp_dP.dy);
+
+ sd->dP = differential_make_compact(tmp_dP);
+
+ globals->P = TO_VEC3(sd->P);
+ globals->dPdx = TO_VEC3(tmp_dP.dx);
+ globals->dPdy = TO_VEC3(tmp_dP.dy);
+ }
+
+ /* execute bump shader */
+ ss->execute(octx, *(kg->osl->bump_state[shader]), *globals);
+
+ /* reset state */
+ sd->P = P;
+ sd->dP = dP;
+
+ globals->P = TO_VEC3(P);
+ globals->dPdx = TO_VEC3(dPdx);
+ globals->dPdy = TO_VEC3(dPdy);
+ }
- /* surface shader */
- if (kg->osl->surface_state[shader]) {
- ss->execute(octx, *(kg->osl->surface_state[shader]), *globals);
+ /* surface shader */
+ if (kg->osl->surface_state[shader]) {
+ ss->execute(octx, *(kg->osl->surface_state[shader]), *globals);
+ }
}
/* flatten closure tree */
if (globals->Ci) {
- flatten_closure_tree(kg, sd, path_flag, globals->Ci);
+ flatten_closure_tree(kg, sd, path_flag, reinterpret_cast<OSLClosure *>(globals->Ci));
}
}
-/* Background */
+/* Volume */
-void OSLShader::eval_background(const KernelGlobalsCPU *kg,
- const void *state,
- ShaderData *sd,
- uint32_t path_flag)
+template<>
+void osl_eval_nodes<SHADER_TYPE_VOLUME>(const KernelGlobalsCPU *kg,
+ const void *state,
+ ShaderData *sd,
+ uint32_t path_flag)
{
/* setup shader globals from shader data */
OSLThreadData *tdata = kg->osl_tdata;
- shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
+ shaderdata_to_shaderglobals(
+ kg, sd, path_flag, reinterpret_cast<ShaderGlobals *>(&tdata->globals));
- /* execute shader for this point */
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
- OSL::ShaderGlobals *globals = &tdata->globals;
- OSL::ShadingContext *octx = tdata->context;
+ /* clear trace data */
+ tdata->tracedata.init = false;
- if (kg->osl->background_state) {
- ss->execute(octx, *(kg->osl->background_state), *globals);
+ /* Used by render-services. */
+ sd->osl_globals = kg;
+ if (path_flag & PATH_RAY_SHADOW) {
+ sd->osl_path_state = nullptr;
+ sd->osl_shadow_path_state = (const IntegratorShadowStateCPU *)state;
}
-
- /* return background color immediately */
- if (globals->Ci) {
- flatten_closure_tree(kg, sd, path_flag, globals->Ci);
+ else {
+ sd->osl_path_state = (const IntegratorStateCPU *)state;
+ sd->osl_shadow_path_state = nullptr;
}
-}
-
-/* Volume */
-
-void OSLShader::eval_volume(const KernelGlobalsCPU *kg,
- const void *state,
- ShaderData *sd,
- uint32_t path_flag)
-{
- /* setup shader globals from shader data */
- OSLThreadData *tdata = kg->osl_tdata;
- shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
/* execute shader */
OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
@@ -283,17 +204,30 @@ void OSLShader::eval_volume(const KernelGlobalsCPU *kg,
/* flatten closure tree */
if (globals->Ci) {
- flatten_closure_tree(kg, sd, path_flag, globals->Ci);
+ flatten_closure_tree(kg, sd, path_flag, reinterpret_cast<OSLClosure *>(globals->Ci));
}
}
/* Displacement */
-void OSLShader::eval_displacement(const KernelGlobalsCPU *kg, const void *state, ShaderData *sd)
+template<>
+void osl_eval_nodes<SHADER_TYPE_DISPLACEMENT>(const KernelGlobalsCPU *kg,
+ const void *state,
+ ShaderData *sd,
+ uint32_t path_flag)
{
/* setup shader globals from shader data */
OSLThreadData *tdata = kg->osl_tdata;
- shaderdata_to_shaderglobals(kg, sd, state, 0, tdata);
+ shaderdata_to_shaderglobals(
+ kg, sd, path_flag, reinterpret_cast<ShaderGlobals *>(&tdata->globals));
+
+ /* clear trace data */
+ tdata->tracedata.init = false;
+
+ /* Used by render-services. */
+ sd->osl_globals = kg;
+ sd->osl_path_state = (const IntegratorStateCPU *)state;
+ sd->osl_shadow_path_state = nullptr;
/* execute shader */
OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
diff --git a/intern/cycles/kernel/osl/closures_setup.h b/intern/cycles/kernel/osl/closures_setup.h
index 96c551b9951..ceaf56ccba6 100644
--- a/intern/cycles/kernel/osl/closures_setup.h
+++ b/intern/cycles/kernel/osl/closures_setup.h
@@ -40,12 +40,7 @@ CCL_NAMESPACE_BEGIN
const char *label;
#define OSL_CLOSURE_STRUCT_END(Upper, lower) \
} \
- ; \
- ccl_device void osl_closure_##lower##_setup(KernelGlobals kg, \
- ccl_private ShaderData *sd, \
- uint32_t path_flag, \
- float3 weight, \
- ccl_private Upper##Closure *closure);
+ ;
#define OSL_CLOSURE_STRUCT_MEMBER(Upper, TYPE, type, name, key) type name;
#define OSL_CLOSURE_STRUCT_ARRAY_MEMBER(Upper, TYPE, type, name, key, size) type name[size];
@@ -210,11 +205,9 @@ ccl_device void osl_closure_microfacet_setup(KernelGlobals kg,
bsdf->ior = closure->ior;
bsdf->T = closure->T;
- static OSL::ustring u_ggx("ggx");
- static OSL::ustring u_default("default");
-
/* GGX */
- if (closure->distribution == u_ggx || closure->distribution == u_default) {
+ if (closure->distribution == make_string("ggx", 11253504724482777663ull) ||
+ closure->distribution == make_string("default", 4430693559278735917ull)) {
if (!closure->refract) {
if (closure->alpha_x == closure->alpha_y) {
/* Isotropic */
@@ -1000,18 +993,14 @@ ccl_device void osl_closure_bssrdf_setup(KernelGlobals kg,
float3 weight,
ccl_private const BSSRDFClosure *closure)
{
- static ustring u_burley("burley");
- static ustring u_random_walk_fixed_radius("random_walk_fixed_radius");
- static ustring u_random_walk("random_walk");
-
ClosureType type;
- if (closure->method == u_burley) {
+ if (closure->method == make_string("burley", 186330084368958868ull)) {
type = CLOSURE_BSSRDF_BURLEY_ID;
}
- else if (closure->method == u_random_walk_fixed_radius) {
+ else if (closure->method == make_string("random_walk_fixed_radius", 5695810351010063150ull)) {
type = CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID;
}
- else if (closure->method == u_random_walk) {
+ else if (closure->method == make_string("random_walk", 11360609267673527222ull)) {
type = CLOSURE_BSSRDF_RANDOM_WALK_ID;
}
else {
diff --git a/intern/cycles/kernel/osl/closures_template.h b/intern/cycles/kernel/osl/closures_template.h
index c808b275966..b9e9b52dcf8 100644
--- a/intern/cycles/kernel/osl/closures_template.h
+++ b/intern/cycles/kernel/osl/closures_template.h
@@ -40,7 +40,7 @@ OSL_CLOSURE_STRUCT_BEGIN(Transparent, transparent)
OSL_CLOSURE_STRUCT_END(Transparent, transparent)
OSL_CLOSURE_STRUCT_BEGIN(Microfacet, microfacet)
- OSL_CLOSURE_STRUCT_MEMBER(Microfacet, STRING, ustring, distribution, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(Microfacet, STRING, DeviceString, distribution, NULL)
OSL_CLOSURE_STRUCT_MEMBER(Microfacet, VECTOR, packed_float3, N, NULL)
OSL_CLOSURE_STRUCT_MEMBER(Microfacet, VECTOR, packed_float3, T, NULL)
OSL_CLOSURE_STRUCT_MEMBER(Microfacet, FLOAT, float, alpha_x, NULL)
@@ -210,7 +210,7 @@ OSL_CLOSURE_STRUCT_BEGIN(PhongRamp, phong_ramp)
OSL_CLOSURE_STRUCT_END(PhongRamp, phong_ramp)
OSL_CLOSURE_STRUCT_BEGIN(BSSRDF, bssrdf)
- OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, STRING, ustring, method, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, STRING, DeviceString, method, NULL)
OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, VECTOR, packed_float3, N, NULL)
OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, VECTOR, packed_float3, radius, NULL)
OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, VECTOR, packed_float3, albedo, NULL)
diff --git a/intern/cycles/kernel/osl/osl.h b/intern/cycles/kernel/osl/osl.h
index bef23f3eea1..cc5c81ad027 100644
--- a/intern/cycles/kernel/osl/osl.h
+++ b/intern/cycles/kernel/osl/osl.h
@@ -1,38 +1,171 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2022 Blender Foundation */
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Adapted from Open Shading Language
+ * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
+ * All Rights Reserved.
+ *
+ * Modifications Copyright 2011-2022 Blender Foundation. */
#pragma once
/* OSL Shader Engine
*
- * Holds all variables to execute and use OSL shaders from the kernel. These
- * are initialized externally by OSLShaderManager before rendering starts.
- *
- * Before/after a thread starts rendering, thread_init/thread_free must be
- * called, which will store any per thread OSL state in thread local storage.
- * This means no thread state must be passed along in the kernel itself.
+ * Holds all variables to execute and use OSL shaders from the kernel.
*/
#include "kernel/osl/types.h"
+#include "kernel/osl/closures_setup.h"
+
CCL_NAMESPACE_BEGIN
-class OSLShader {
- public:
- /* eval */
- static void eval_surface(const KernelGlobalsCPU *kg,
- const void *state,
- ShaderData *sd,
- uint32_t path_flag);
- static void eval_background(const KernelGlobalsCPU *kg,
- const void *state,
- ShaderData *sd,
- uint32_t path_flag);
- static void eval_volume(const KernelGlobalsCPU *kg,
- const void *state,
- ShaderData *sd,
- uint32_t path_flag);
- static void eval_displacement(const KernelGlobalsCPU *kg, const void *state, ShaderData *sd);
-};
+ccl_device_inline void shaderdata_to_shaderglobals(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ ccl_private ShaderGlobals *globals)
+{
+ const differential3 dP = differential_from_compact(sd->Ng, sd->dP);
+ const differential3 dI = differential_from_compact(sd->I, sd->dI);
+
+ /* copy from shader data to shader globals */
+ globals->P = sd->P;
+ globals->dPdx = dP.dx;
+ globals->dPdy = dP.dy;
+ globals->I = sd->I;
+ globals->dIdx = dI.dx;
+ globals->dIdy = dI.dy;
+ globals->N = sd->N;
+ globals->Ng = sd->Ng;
+ globals->u = sd->u;
+ globals->dudx = sd->du.dx;
+ globals->dudy = sd->du.dy;
+ globals->v = sd->v;
+ globals->dvdx = sd->dv.dx;
+ globals->dvdy = sd->dv.dy;
+ globals->dPdu = sd->dPdu;
+ globals->dPdv = sd->dPdv;
+ globals->time = sd->time;
+ globals->dtime = 1.0f;
+ globals->surfacearea = 1.0f;
+ globals->raytype = path_flag;
+ globals->flipHandedness = 0;
+ globals->backfacing = (sd->flag & SD_BACKFACING);
+
+ /* shader data to be used in services callbacks */
+ globals->renderstate = sd;
+
+ /* hacky, we leave it to services to fetch actual object matrix */
+ globals->shader2common = sd;
+ globals->object2common = sd;
+
+ /* must be set to NULL before execute */
+ globals->Ci = nullptr;
+}
+
+ccl_device void flatten_closure_tree(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ ccl_private const OSLClosure *closure)
+{
+ int stack_size = 0;
+ float3 weight = one_float3();
+ float3 weight_stack[16];
+ ccl_private const OSLClosure *closure_stack[16];
+
+ while (closure) {
+ switch (closure->id) {
+ case OSL_CLOSURE_MUL_ID: {
+ ccl_private const OSLClosureMul *mul = static_cast<ccl_private const OSLClosureMul *>(
+ closure);
+ weight *= mul->weight;
+ closure = mul->closure;
+ continue;
+ }
+ case OSL_CLOSURE_ADD_ID: {
+ if (stack_size >= 16) {
+ kernel_assert(!"Exhausted OSL closure stack");
+ break;
+ }
+ ccl_private const OSLClosureAdd *add = static_cast<ccl_private const OSLClosureAdd *>(
+ closure);
+ closure = add->closureA;
+ weight_stack[stack_size] = weight;
+ closure_stack[stack_size++] = add->closureB;
+ continue;
+ }
+#define OSL_CLOSURE_STRUCT_BEGIN(Upper, lower) \
+ case OSL_CLOSURE_##Upper##_ID: { \
+ ccl_private const OSLClosureComponent *comp = \
+ static_cast<ccl_private const OSLClosureComponent *>(closure); \
+ osl_closure_##lower##_setup(kg, \
+ sd, \
+ path_flag, \
+ weight * comp->weight, \
+ reinterpret_cast<ccl_private const Upper##Closure *>(comp + 1)); \
+ break; \
+ }
+#include "closures_template.h"
+ default:
+ break;
+ }
+
+ if (stack_size > 0) {
+ weight = weight_stack[--stack_size];
+ closure = closure_stack[stack_size];
+ }
+ else {
+ closure = nullptr;
+ }
+ }
+}
+
+#ifndef __KERNEL_GPU__
+
+template<ShaderType type>
+void osl_eval_nodes(const KernelGlobalsCPU *kg,
+ const void *state,
+ ShaderData *sd,
+ uint32_t path_flag);
+
+#else
+
+template<ShaderType type, typename ConstIntegratorGenericState>
+ccl_device_inline void osl_eval_nodes(KernelGlobals kg,
+ ConstIntegratorGenericState state,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag)
+{
+ ShaderGlobals globals;
+ shaderdata_to_shaderglobals(kg, sd, path_flag, &globals);
+
+ const int shader = sd->shader & SHADER_MASK;
+
+# ifdef __KERNEL_OPTIX__
+ uint8_t group_data[2048];
+ uint8_t closure_pool[1024];
+ sd->osl_closure_pool = closure_pool;
+
+ unsigned int optix_dc_index = 2 /* NUM_CALLABLE_PROGRAM_GROUPS */ +
+ (shader + type * kernel_data.max_shaders) * 2;
+ optixDirectCall<void>(optix_dc_index + 0,
+ /* shaderglobals_ptr = */ &globals,
+ /* groupdata_ptr = */ (void *)group_data,
+ /* userdata_base_ptr = */ (void *)nullptr,
+ /* output_base_ptr = */ (void *)nullptr,
+ /* shadeindex = */ 0);
+ optixDirectCall<void>(optix_dc_index + 1,
+ /* shaderglobals_ptr = */ &globals,
+ /* groupdata_ptr = */ (void *)group_data,
+ /* userdata_base_ptr = */ (void *)nullptr,
+ /* output_base_ptr = */ (void *)nullptr,
+ /* shadeindex = */ 0);
+# endif
+
+ if (globals.Ci) {
+ flatten_closure_tree(kg, sd, path_flag, globals.Ci);
+ }
+}
+
+#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/services.cpp b/intern/cycles/kernel/osl/services.cpp
index b744422ee78..3fd098de4bb 100644
--- a/intern/cycles/kernel/osl/services.cpp
+++ b/intern/cycles/kernel/osl/services.cpp
@@ -119,8 +119,8 @@ ustring OSLRenderServices::u_u("u");
ustring OSLRenderServices::u_v("v");
ustring OSLRenderServices::u_empty;
-OSLRenderServices::OSLRenderServices(OSL::TextureSystem *texture_system)
- : OSL::RendererServices(texture_system)
+OSLRenderServices::OSLRenderServices(OSL::TextureSystem *texture_system, int device_type)
+ : OSL::RendererServices(texture_system), device_type_(device_type)
{
}
@@ -131,6 +131,17 @@ OSLRenderServices::~OSLRenderServices()
}
}
+int OSLRenderServices::supports(string_view feature) const
+{
+#ifdef WITH_OPTIX
+ if (feature == "OptiX") {
+ return device_type_ == DEVICE_OPTIX;
+ }
+#endif
+
+ return false;
+}
+
bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
OSL::Matrix44 &result,
OSL::TransformationPtr xform,
@@ -1139,29 +1150,40 @@ TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring file
{
OSLTextureHandleMap::iterator it = textures.find(filename);
- /* For non-OIIO textures, just return a pointer to our own OSLTextureHandle. */
- if (it != textures.end()) {
- if (it->second->type != OSLTextureHandle::OIIO) {
- return (TextureSystem::TextureHandle *)it->second.get();
+ if (device_type_ == DEVICE_CPU) {
+ /* For non-OIIO textures, just return a pointer to our own OSLTextureHandle. */
+ if (it != textures.end()) {
+ if (it->second->type != OSLTextureHandle::OIIO) {
+ return (TextureSystem::TextureHandle *)it->second.get();
+ }
}
- }
- /* Get handle from OpenImageIO. */
- OSL::TextureSystem *ts = m_texturesys;
- TextureSystem::TextureHandle *handle = ts->get_texture_handle(filename);
- if (handle == NULL) {
- return NULL;
- }
+ /* Get handle from OpenImageIO. */
+ OSL::TextureSystem *ts = m_texturesys;
+ TextureSystem::TextureHandle *handle = ts->get_texture_handle(filename);
+ if (handle == NULL) {
+ return NULL;
+ }
+
+ /* Insert new OSLTextureHandle if needed. */
+ if (it == textures.end()) {
+ textures.insert(filename, new OSLTextureHandle(OSLTextureHandle::OIIO));
+ it = textures.find(filename);
+ }
- /* Insert new OSLTextureHandle if needed. */
- if (it == textures.end()) {
- textures.insert(filename, new OSLTextureHandle(OSLTextureHandle::OIIO));
- it = textures.find(filename);
+ /* Assign OIIO texture handle and return. */
+ it->second->oiio_handle = handle;
+ return (TextureSystem::TextureHandle *)it->second.get();
}
+ else {
+ if (it != textures.end() && it->second->type == OSLTextureHandle::SVM &&
+ it->second->svm_slots[0].w == -1) {
+ return reinterpret_cast<TextureSystem::TextureHandle *>(
+ static_cast<uintptr_t>(it->second->svm_slots[0].y + 1));
+ }
- /* Assign OIIO texture handle and return. */
- it->second->oiio_handle = handle;
- return (TextureSystem::TextureHandle *)it->second.get();
+ return NULL;
+ }
}
bool OSLRenderServices::good(TextureSystem::TextureHandle *texture_handle)
diff --git a/intern/cycles/kernel/osl/services.h b/intern/cycles/kernel/osl/services.h
index 334b6682e34..9d875ae8e94 100644
--- a/intern/cycles/kernel/osl/services.h
+++ b/intern/cycles/kernel/osl/services.h
@@ -22,11 +22,8 @@ class PtexCache;
CCL_NAMESPACE_BEGIN
-class Object;
class Scene;
-class Shader;
struct ShaderData;
-struct float3;
struct KernelGlobalsCPU;
/* OSL Texture Handle
@@ -73,11 +70,13 @@ typedef OIIO::unordered_map_concurrent<ustring, OSLTextureHandleRef, ustringHash
class OSLRenderServices : public OSL::RendererServices {
public:
- OSLRenderServices(OSL::TextureSystem *texture_system);
+ OSLRenderServices(OSL::TextureSystem *texture_system, int device_type);
~OSLRenderServices();
static void register_closures(OSL::ShadingSystem *ss);
+ int supports(string_view feature) const override;
+
bool get_matrix(OSL::ShaderGlobals *sg,
OSL::Matrix44 &result,
OSL::TransformationPtr xform,
@@ -324,6 +323,9 @@ class OSLRenderServices : public OSL::RendererServices {
* and is required because texture handles are cached as part of the shared
* shading system. */
OSLTextureHandleMap textures;
+
+ private:
+ int device_type_;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/services_gpu.h b/intern/cycles/kernel/osl/services_gpu.h
new file mode 100644
index 00000000000..75cf39919a0
--- /dev/null
+++ b/intern/cycles/kernel/osl/services_gpu.h
@@ -0,0 +1,2176 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Adapted from Open Shading Language
+ * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
+ * All Rights Reserved.
+ *
+ * Modifications Copyright 2011-2022 Blender Foundation. */
+
+#include "kernel/tables.h"
+#include "kernel/util/differential.h"
+
+#include "kernel/osl/osl.h"
+
+namespace DeviceStrings {
+
+/* "" */
+ccl_device_constant DeviceString _emptystring_ = {0ull};
+/* "common" */
+ccl_device_constant DeviceString u_common = {14645198576927606093ull};
+/* "world" */
+ccl_device_constant DeviceString u_world = {16436542438370751598ull};
+/* "shader" */
+ccl_device_constant DeviceString u_shader = {4279676006089868ull};
+/* "object" */
+ccl_device_constant DeviceString u_object = {973692718279674627ull};
+/* "NDC" */
+ccl_device_constant DeviceString u_ndc = {5148305047403260775ull};
+/* "screen" */
+ccl_device_constant DeviceString u_screen = {14159088609039777114ull};
+/* "camera" */
+ccl_device_constant DeviceString u_camera = {2159505832145726196ull};
+/* "raster" */
+ccl_device_constant DeviceString u_raster = {7759263238610201778ull};
+/* "hsv" */
+ccl_device_constant DeviceString u_hsv = {2177035556331879497ull};
+/* "hsl" */
+ccl_device_constant DeviceString u_hsl = {7749766809258288148ull};
+/* "XYZ" */
+ccl_device_constant DeviceString u_xyz = {4957977063494975483ull};
+/* "xyY" */
+ccl_device_constant DeviceString u_xyy = {5138822319725660255ull};
+/* "sRGB" */
+ccl_device_constant DeviceString u_srgb = {15368599878474175032ull};
+/* "object:location" */
+ccl_device_constant DeviceString u_object_location = {7846190347358762897ull};
+/* "object:color" */
+ccl_device_constant DeviceString u_object_color = {12695623857059169556ull};
+/* "object:alpha" */
+ccl_device_constant DeviceString u_object_alpha = {11165053919428293151ull};
+/* "object:index" */
+ccl_device_constant DeviceString u_object_index = {6588325838217472556ull};
+/* "geom:dupli_generated" */
+ccl_device_constant DeviceString u_geom_dupli_generated = {6715607178003388908ull};
+/* "geom:dupli_uv" */
+ccl_device_constant DeviceString u_geom_dupli_uv = {1294253317490155849ull};
+/* "material:index" */
+ccl_device_constant DeviceString u_material_index = {741770758159634623ull};
+/* "object:random" */
+ccl_device_constant DeviceString u_object_random = {15789063994977955884ull};
+/* "particle:index" */
+ccl_device_constant DeviceString u_particle_index = {9489711748229903784ull};
+/* "particle:random" */
+ccl_device_constant DeviceString u_particle_random = {17993722202766855761ull};
+/* "particle:age" */
+ccl_device_constant DeviceString u_particle_age = {7380730644710951109ull};
+/* "particle:lifetime" */
+ccl_device_constant DeviceString u_particle_lifetime = {16576828923156200061ull};
+/* "particle:location" */
+ccl_device_constant DeviceString u_particle_location = {10309536211423573010ull};
+/* "particle:rotation" */
+ccl_device_constant DeviceString u_particle_rotation = {17858543768041168459ull};
+/* "particle:size" */
+ccl_device_constant DeviceString u_particle_size = {16461524249715420389ull};
+/* "particle:velocity" */
+ccl_device_constant DeviceString u_particle_velocity = {13199101248768308863ull};
+/* "particle:angular_velocity" */
+ccl_device_constant DeviceString u_particle_angular_velocity = {16327930120486517910ull};
+/* "geom:numpolyvertices" */
+ccl_device_constant DeviceString u_geom_numpolyvertices = {382043551489988826ull};
+/* "geom:trianglevertices" */
+ccl_device_constant DeviceString u_geom_trianglevertices = {17839267571524187074ull};
+/* "geom:polyvertices" */
+ccl_device_constant DeviceString u_geom_polyvertices = {1345577201967881769ull};
+/* "geom:name" */
+ccl_device_constant DeviceString u_geom_name = {13606338128269760050ull};
+/* "geom:undisplaced" */
+ccl_device_constant DeviceString u_geom_undisplaced = {12431586303019276305ull};
+/* "geom:is_smooth" */
+ccl_device_constant DeviceString u_is_smooth = {857544214094480123ull};
+/* "geom:is_curve" */
+ccl_device_constant DeviceString u_is_curve = {129742495633653138ull};
+/* "geom:curve_thickness" */
+ccl_device_constant DeviceString u_curve_thickness = {10605802038397633852ull};
+/* "geom:curve_length" */
+ccl_device_constant DeviceString u_curve_length = {11423459517663715453ull};
+/* "geom:curve_tangent_normal" */
+ccl_device_constant DeviceString u_curve_tangent_normal = {12301397394034985633ull};
+/* "geom:curve_random" */
+ccl_device_constant DeviceString u_curve_random = {15293085049960492358ull};
+/* "geom:is_point" */
+ccl_device_constant DeviceString u_is_point = {2511357849436175953ull};
+/* "geom:point_radius" */
+ccl_device_constant DeviceString u_point_radius = {9956381140398668479ull};
+/* "geom:point_position" */
+ccl_device_constant DeviceString u_point_position = {15684484280742966916ull};
+/* "geom:point_random" */
+ccl_device_constant DeviceString u_point_random = {5632627207092325544ull};
+/* "geom:normal_map_normal" */
+ccl_device_constant DeviceString u_normal_map_normal = {10718948685686827073};
+/* "path:ray_length" */
+ccl_device_constant DeviceString u_path_ray_length = {16391985802412544524ull};
+/* "path:ray_depth" */
+ccl_device_constant DeviceString u_path_ray_depth = {16643933224879500399ull};
+/* "path:diffuse_depth" */
+ccl_device_constant DeviceString u_path_diffuse_depth = {13191651286699118408ull};
+/* "path:glossy_depth" */
+ccl_device_constant DeviceString u_path_glossy_depth = {15717768399057252940ull};
+/* "path:transparent_depth" */
+ccl_device_constant DeviceString u_path_transparent_depth = {7821650266475578543ull};
+/* "path:transmission_depth" */
+ccl_device_constant DeviceString u_path_transmission_depth = {15113408892323917624ull};
+
+} // namespace DeviceStrings
+
+/* Closure */
+
+ccl_device_extern ccl_private OSLClosure *osl_mul_closure_color(ccl_private ShaderGlobals *sg,
+ ccl_private OSLClosure *a,
+ ccl_private const float3 *weight)
+{
+ if (*weight == zero_float3() || !a) {
+ return nullptr;
+ }
+ else if (*weight == one_float3()) {
+ return a;
+ }
+
+ ccl_private ShaderData *const sd = static_cast<ccl_private ShaderData *>(sg->renderstate);
+
+ ccl_private uint8_t *closure_pool = sd->osl_closure_pool;
+ /* Align pointer to closure struct requirement */
+ closure_pool = reinterpret_cast<uint8_t *>(
+ (reinterpret_cast<size_t>(closure_pool) + alignof(OSLClosureMul) - 1) &
+ (-alignof(OSLClosureMul)));
+ sd->osl_closure_pool = closure_pool + sizeof(OSLClosureMul);
+
+ ccl_private OSLClosureMul *const closure = reinterpret_cast<ccl_private OSLClosureMul *>(
+ closure_pool);
+ closure->id = OSL_CLOSURE_MUL_ID;
+ closure->weight = *weight;
+ closure->closure = a;
+
+ return closure;
+}
+
+ccl_device_extern ccl_private OSLClosure *osl_mul_closure_float(ccl_private ShaderGlobals *sg,
+ ccl_private OSLClosure *a,
+ float weight)
+{
+ if (weight == 0.0f || !a) {
+ return nullptr;
+ }
+ else if (weight == 1.0f) {
+ return a;
+ }
+
+ ccl_private ShaderData *const sd = static_cast<ccl_private ShaderData *>(sg->renderstate);
+
+ uint8_t *closure_pool = sd->osl_closure_pool;
+ /* Align pointer to closure struct requirement */
+ closure_pool = reinterpret_cast<uint8_t *>(
+ (reinterpret_cast<size_t>(closure_pool) + alignof(OSLClosureMul) - 1) &
+ (-alignof(OSLClosureMul)));
+ sd->osl_closure_pool = closure_pool + sizeof(OSLClosureMul);
+
+ ccl_private OSLClosureMul *const closure = reinterpret_cast<ccl_private OSLClosureMul *>(
+ closure_pool);
+ closure->id = OSL_CLOSURE_MUL_ID;
+ closure->weight = make_float3(weight, weight, weight);
+ closure->closure = a;
+
+ return closure;
+}
+
+ccl_device_extern ccl_private OSLClosure *osl_add_closure_closure(ccl_private ShaderGlobals *sg,
+ ccl_private OSLClosure *a,
+ ccl_private OSLClosure *b)
+{
+ if (!a) {
+ return b;
+ }
+ if (!b) {
+ return a;
+ }
+
+ ccl_private ShaderData *const sd = static_cast<ccl_private ShaderData *>(sg->renderstate);
+
+ ccl_private uint8_t *closure_pool = sd->osl_closure_pool;
+ /* Align pointer to closure struct requirement */
+ closure_pool = reinterpret_cast<uint8_t *>(
+ (reinterpret_cast<size_t>(closure_pool) + alignof(OSLClosureAdd) - 1) &
+ (-alignof(OSLClosureAdd)));
+ sd->osl_closure_pool = closure_pool + sizeof(OSLClosureAdd);
+
+ ccl_private OSLClosureAdd *const closure = reinterpret_cast<ccl_private OSLClosureAdd *>(
+ closure_pool);
+ closure->id = OSL_CLOSURE_ADD_ID;
+ closure->closureA = a;
+ closure->closureB = b;
+
+ return closure;
+}
+
+ccl_device_extern ccl_private OSLClosure *osl_allocate_closure_component(
+ ccl_private ShaderGlobals *sg, int id, int size)
+{
+ ccl_private ShaderData *const sd = static_cast<ccl_private ShaderData *>(sg->renderstate);
+
+ ccl_private uint8_t *closure_pool = sd->osl_closure_pool;
+ /* Align pointer to closure struct requirement */
+ closure_pool = reinterpret_cast<uint8_t *>(
+ (reinterpret_cast<size_t>(closure_pool) + alignof(OSLClosureComponent) - 1) &
+ (-alignof(OSLClosureComponent)));
+ sd->osl_closure_pool = closure_pool + sizeof(OSLClosureComponent) + size;
+
+ ccl_private OSLClosureComponent *const closure =
+ reinterpret_cast<ccl_private OSLClosureComponent *>(closure_pool);
+ closure->id = static_cast<OSLClosureType>(id);
+ closure->weight = one_float3();
+
+ return closure;
+}
+
+ccl_device_extern ccl_private OSLClosure *osl_allocate_weighted_closure_component(
+ ccl_private ShaderGlobals *sg, int id, int size, ccl_private const float3 *weight)
+{
+ ccl_private ShaderData *const sd = static_cast<ccl_private ShaderData *>(sg->renderstate);
+
+ ccl_private uint8_t *closure_pool = sd->osl_closure_pool;
+ /* Align pointer to closure struct requirement */
+ closure_pool = reinterpret_cast<uint8_t *>(
+ (reinterpret_cast<size_t>(closure_pool) + alignof(OSLClosureComponent) - 1) &
+ (-alignof(OSLClosureComponent)));
+ sd->osl_closure_pool = closure_pool + sizeof(OSLClosureComponent) + size;
+
+ ccl_private OSLClosureComponent *const closure =
+ reinterpret_cast<ccl_private OSLClosureComponent *>(closure_pool);
+ closure->id = static_cast<OSLClosureType>(id);
+ closure->weight = *weight;
+
+ return closure;
+}
+
+/* Utilities */
+
+#include "kernel/svm/math_util.h"
+#include "kernel/util/color.h"
+
+ccl_device_extern void osl_error(ccl_private ShaderGlobals *sg, const char *format, void *args)
+{
+}
+
+ccl_device_extern void osl_printf(ccl_private ShaderGlobals *sg, const char *format, void *args)
+{
+}
+
+ccl_device_extern void osl_warning(ccl_private ShaderGlobals *sg, const char *format, void *args)
+{
+}
+
+ccl_device_extern uint osl_range_check(int indexvalue,
+ int length,
+ DeviceString symname,
+ ccl_private ShaderGlobals *sg,
+ DeviceString sourcefile,
+ int sourceline,
+ DeviceString groupname,
+ int layer,
+ DeviceString layername,
+ DeviceString shadername)
+{
+ const int result = indexvalue < 0 ? 0 : indexvalue >= length ? length - 1 : indexvalue;
+#if 0
+ if (result != indexvalue) {
+ printf("Index [%d] out of range\n", indexvalue);
+ }
+#endif
+ return result;
+}
+
+ccl_device_extern uint osl_range_check_err(int indexvalue,
+ int length,
+ DeviceString symname,
+ ccl_private ShaderGlobals *sg,
+ DeviceString sourcefile,
+ int sourceline,
+ DeviceString groupname,
+ int layer,
+ DeviceString layername,
+ DeviceString shadername)
+{
+ return osl_range_check(indexvalue,
+ length,
+ symname,
+ sg,
+ sourcefile,
+ sourceline,
+ groupname,
+ layer,
+ layername,
+ shadername);
+}
+
+/* Color Utilities */
+
+ccl_device_extern void osl_blackbody_vf(ccl_private ShaderGlobals *sg,
+ ccl_private float3 *result,
+ float temperature)
+{
+ float3 color_rgb = rec709_to_rgb(nullptr, svm_math_blackbody_color_rec709(temperature));
+ color_rgb = max(color_rgb, zero_float3());
+ *result = color_rgb;
+}
+
+#if 0
+ccl_device_extern void osl_wavelength_color_vf(ccl_private ShaderGlobals *sg,
+ ccl_private float3 *result,
+ float wavelength)
+{
+}
+#endif
+
+ccl_device_extern void osl_luminance_fv(ccl_private ShaderGlobals *sg,
+ ccl_private float *result,
+ ccl_private float3 *color)
+{
+ *result = linear_rgb_to_gray(nullptr, *color);
+}
+
+ccl_device_extern void osl_luminance_dfdv(ccl_private ShaderGlobals *sg,
+ ccl_private float *result,
+ ccl_private float3 *color)
+{
+ for (int i = 0; i < 3; ++i) {
+ osl_luminance_fv(sg, result + i, color + i);
+ }
+}
+
+ccl_device_extern void osl_prepend_color_from(ccl_private ShaderGlobals *sg,
+ ccl_private float3 *res,
+ DeviceString from)
+{
+ if (from == DeviceStrings::u_hsv) {
+ *res = hsv_to_rgb(*res);
+ }
+ else if (from == DeviceStrings::u_hsl) {
+ *res = hsl_to_rgb(*res);
+ }
+ else if (from == DeviceStrings::u_xyz) {
+ *res = xyz_to_rgb(nullptr, *res);
+ }
+ else if (from == DeviceStrings::u_xyy) {
+ *res = xyz_to_rgb(nullptr, xyY_to_xyz(res->x, res->y, res->z));
+ }
+}
+
+ccl_device_extern bool osl_transformc(ccl_private ShaderGlobals *sg,
+ ccl_private float3 *c_in,
+ int c_in_derivs,
+ ccl_private float3 *c_out,
+ int c_out_derivs,
+ DeviceString from,
+ DeviceString to)
+{
+ if (!c_out_derivs) {
+ c_in_derivs = false;
+ }
+ else if (!c_in_derivs) {
+ c_out[1] = zero_float3();
+ c_out[2] = zero_float3();
+ }
+
+ float3 rgb;
+
+ for (int i = 0; i < (c_in_derivs ? 3 : 1); ++i) {
+ if (from == DeviceStrings::u_hsv) {
+ rgb = hsv_to_rgb(c_in[i]);
+ }
+ else if (from == DeviceStrings::u_hsl) {
+ rgb = hsl_to_rgb(c_in[i]);
+ }
+ else if (from == DeviceStrings::u_xyz) {
+ rgb = xyz_to_rgb(nullptr, c_in[i]);
+ }
+ else if (from == DeviceStrings::u_xyy) {
+ rgb = xyz_to_rgb(nullptr, xyY_to_xyz(c_in[i].x, c_in[i].y, c_in[i].z));
+ }
+ else if (from == DeviceStrings::u_srgb) {
+ rgb = color_srgb_to_linear_v3(c_in[i]);
+ }
+ else {
+ rgb = c_in[i];
+ }
+
+ if (to == DeviceStrings::u_hsv) {
+ c_out[i] = rgb_to_hsv(rgb);
+ }
+ else if (to == DeviceStrings::u_hsl) {
+ c_out[i] = rgb_to_hsl(rgb);
+ }
+#if 0
+ else if (to == DeviceStrings::u_xyz) {
+ c_out[i] = rgb_to_xyz(nullptr, rgb);
+ }
+ else if (to == DeviceStrings::u_xyy) {
+ c_out[i] = xyz_to_xyY(rgb_to_xyz(nullptr, rgb));
+ }
+#endif
+ else if (to == DeviceStrings::u_srgb) {
+ c_out[i] = color_linear_to_srgb_v3(rgb);
+ }
+ else {
+ c_out[i] = rgb;
+ }
+ }
+
+ return true;
+}
+
+/* Matrix Utilities */
+
+#include "kernel/geom/object.h"
+#include "util/transform.h"
+
+ccl_device_forceinline void copy_matrix(ccl_private float *res, const Transform &tfm)
+{
+ res[0] = tfm.x.x;
+ res[1] = tfm.y.x;
+ res[2] = tfm.z.x;
+ res[3] = 0.0f;
+ res[4] = tfm.x.y;
+ res[5] = tfm.y.y;
+ res[6] = tfm.z.y;
+ res[7] = 0.0f;
+ res[8] = tfm.x.z;
+ res[9] = tfm.y.z;
+ res[10] = tfm.z.z;
+ res[11] = 0.0f;
+ res[12] = tfm.x.w;
+ res[13] = tfm.y.w;
+ res[14] = tfm.z.w;
+ res[15] = 1.0f;
+}
+ccl_device_forceinline void copy_matrix(ccl_private float *res, const ProjectionTransform &tfm)
+{
+ res[0] = tfm.x.x;
+ res[1] = tfm.y.x;
+ res[2] = tfm.z.x;
+ res[3] = tfm.w.x;
+ res[4] = tfm.x.y;
+ res[5] = tfm.y.y;
+ res[6] = tfm.z.y;
+ res[7] = tfm.w.y;
+ res[8] = tfm.x.z;
+ res[9] = tfm.y.z;
+ res[10] = tfm.z.z;
+ res[11] = tfm.w.z;
+ res[12] = tfm.x.w;
+ res[13] = tfm.y.w;
+ res[14] = tfm.z.w;
+ res[15] = tfm.w.w;
+}
+ccl_device_forceinline void copy_identity_matrix(ccl_private float *res, float value = 1.0f)
+{
+ res[0] = value;
+ res[1] = 0.0f;
+ res[2] = 0.0f;
+ res[3] = 0.0f;
+ res[4] = 0.0f;
+ res[5] = value;
+ res[6] = 0.0f;
+ res[7] = 0.0f;
+ res[8] = 0.0f;
+ res[9] = 0.0f;
+ res[10] = value;
+ res[11] = 0.0f;
+ res[12] = 0.0f;
+ res[13] = 0.0f;
+ res[14] = 0.0f;
+ res[15] = value;
+}
+ccl_device_forceinline Transform convert_transform(ccl_private const float *m)
+{
+ return make_transform(
+ m[0], m[4], m[8], m[12], m[1], m[5], m[9], m[13], m[2], m[6], m[10], m[14]);
+}
+
+ccl_device_extern void osl_mul_mmm(ccl_private float *res,
+ ccl_private const float *a,
+ ccl_private const float *b)
+{
+ const Transform tfm_a = convert_transform(a);
+ const Transform tfm_b = convert_transform(b);
+ copy_matrix(res, tfm_a * tfm_b);
+}
+
+ccl_device_extern void osl_mul_mmf(ccl_private float *res, ccl_private const float *a, float b)
+{
+ for (int i = 0; i < 16; ++i) {
+ res[i] = a[i] * b;
+ }
+}
+
+ccl_device_extern void osl_div_mmm(ccl_private float *res,
+ ccl_private const float *a,
+ ccl_private const float *b)
+{
+ const Transform tfm_a = convert_transform(a);
+ const Transform tfm_b = convert_transform(b);
+ copy_matrix(res, tfm_a * transform_inverse(tfm_b));
+}
+
+ccl_device_extern void osl_div_mmf(ccl_private float *res, ccl_private const float *a, float b)
+{
+ for (int i = 0; i < 16; ++i) {
+ res[i] = a[i] / b;
+ }
+}
+
+ccl_device_extern void osl_div_mfm(ccl_private float *res, float a, ccl_private const float *b)
+{
+ const Transform tfm_b = convert_transform(b);
+ copy_matrix(res, transform_inverse(tfm_b));
+ for (int i = 0; i < 16; ++i) {
+ res[i] *= a;
+ }
+}
+
+ccl_device_extern void osl_div_m_ff(ccl_private float *res, float a, float b)
+{
+ float f = (b == 0) ? 0.0f : (a / b);
+ copy_identity_matrix(res, f);
+}
+
+ccl_device_extern void osl_transform_vmv(ccl_private float3 *res,
+ ccl_private const float *m,
+ ccl_private const float3 *v)
+{
+ const Transform tfm_m = convert_transform(m);
+ *res = transform_point(&tfm_m, *v);
+}
+
+ccl_device_extern void osl_transform_dvmdv(ccl_private float3 *res,
+ ccl_private const float *m,
+ ccl_private const float3 *v)
+{
+ for (int i = 0; i < 3; ++i) {
+ const Transform tfm_m = convert_transform(m + i * 16);
+ res[i] = transform_point(&tfm_m, v[i]);
+ }
+}
+
+ccl_device_extern void osl_transformv_vmv(ccl_private float3 *res,
+ ccl_private const float *m,
+ ccl_private const float3 *v)
+{
+ const Transform tfm_m = convert_transform(m);
+ *res = transform_direction(&tfm_m, *v);
+}
+
+ccl_device_extern void osl_transformv_dvmdv(ccl_private float3 *res,
+ ccl_private const float *m,
+ ccl_private const float3 *v)
+{
+ for (int i = 0; i < 3; ++i) {
+ const Transform tfm_m = convert_transform(m + i * 16);
+ res[i] = transform_direction(&tfm_m, v[i]);
+ }
+}
+
+ccl_device_extern void osl_transformn_vmv(ccl_private float3 *res,
+ ccl_private const float *m,
+ ccl_private const float3 *v)
+{
+ const Transform tfm_m = convert_transform(m);
+ *res = transform_direction(&tfm_m, *v);
+}
+
+ccl_device_extern void osl_transformn_dvmdv(ccl_private float3 *res,
+ ccl_private const float *m,
+ ccl_private const float3 *v)
+{
+ for (int i = 0; i < 3; ++i) {
+ const Transform tfm_m = convert_transform(m + i * 16);
+ res[i] = transform_direction(&tfm_m, v[i]);
+ }
+}
+
+ccl_device_extern bool osl_get_matrix(ccl_private ShaderGlobals *sg,
+ ccl_private float *res,
+ DeviceString from)
+{
+ if (from == DeviceStrings::u_common || from == DeviceStrings::u_world) {
+ copy_identity_matrix(res);
+ return true;
+ }
+ if (from == DeviceStrings::u_shader || from == DeviceStrings::u_object) {
+ KernelGlobals kg = nullptr;
+ ccl_private ShaderData *const sd = static_cast<ccl_private ShaderData *>(sg->renderstate);
+ int object = sd->object;
+
+ if (object != OBJECT_NONE) {
+ const Transform tfm = object_get_transform(kg, sd);
+ copy_matrix(res, tfm);
+ return true;
+ }
+ else if (sd->type == PRIMITIVE_LAMP) {
+ const Transform tfm = lamp_fetch_transform(kg, sd->lamp, false);
+ copy_matrix(res, tfm);
+ return true;
+ }
+ }
+ else if (from == DeviceStrings::u_ndc) {
+ copy_matrix(res, kernel_data.cam.ndctoworld);
+ return true;
+ }
+ else if (from == DeviceStrings::u_raster) {
+ copy_matrix(res, kernel_data.cam.rastertoworld);
+ return true;
+ }
+ else if (from == DeviceStrings::u_screen) {
+ copy_matrix(res, kernel_data.cam.screentoworld);
+ return true;
+ }
+ else if (from == DeviceStrings::u_camera) {
+ copy_matrix(res, kernel_data.cam.cameratoworld);
+ return true;
+ }
+
+ return false;
+}
+
+ccl_device_extern bool osl_get_inverse_matrix(ccl_private ShaderGlobals *sg,
+ ccl_private float *res,
+ DeviceString to)
+{
+ if (to == DeviceStrings::u_common || to == DeviceStrings::u_world) {
+ copy_identity_matrix(res);
+ return true;
+ }
+ if (to == DeviceStrings::u_shader || to == DeviceStrings::u_object) {
+ KernelGlobals kg = nullptr;
+ ccl_private ShaderData *const sd = static_cast<ccl_private ShaderData *>(sg->renderstate);
+ int object = sd->object;
+
+ if (object != OBJECT_NONE) {
+ const Transform itfm = object_get_inverse_transform(kg, sd);
+ copy_matrix(res, itfm);
+ return true;
+ }
+ else if (sd->type == PRIMITIVE_LAMP) {
+ const Transform itfm = lamp_fetch_transform(kg, sd->lamp, true);
+ copy_matrix(res, itfm);
+ return true;
+ }
+ }
+ else if (to == DeviceStrings::u_ndc) {
+ copy_matrix(res, kernel_data.cam.worldtondc);
+ return true;
+ }
+ else if (to == DeviceStrings::u_raster) {
+ copy_matrix(res, kernel_data.cam.worldtoraster);
+ return true;
+ }
+ else if (to == DeviceStrings::u_screen) {
+ copy_matrix(res, kernel_data.cam.worldtoscreen);
+ return true;
+ }
+ else if (to == DeviceStrings::u_camera) {
+ copy_matrix(res, kernel_data.cam.worldtocamera);
+ return true;
+ }
+
+ return false;
+}
+
+ccl_device_extern bool osl_prepend_matrix_from(ccl_private ShaderGlobals *sg,
+ ccl_private float *res,
+ DeviceString from)
+{
+ float m_from[16];
+ if (osl_get_matrix(sg, m_from, from)) {
+ osl_mul_mmm(res, m_from, res);
+ return true;
+ }
+
+ return false;
+}
+
+ccl_device_extern bool osl_get_from_to_matrix(ccl_private ShaderGlobals *sg,
+ ccl_private float *res,
+ DeviceString from,
+ DeviceString to)
+{
+ float m_from[16], m_to[16];
+ if (osl_get_matrix(sg, m_from, from) && osl_get_inverse_matrix(sg, m_to, to)) {
+ osl_mul_mmm(res, m_from, m_to);
+ return true;
+ }
+
+ return false;
+}
+
+ccl_device_extern bool osl_transform_triple(ccl_private ShaderGlobals *sg,
+ ccl_private float3 *p_in,
+ int p_in_derivs,
+ ccl_private float3 *p_out,
+ int p_out_derivs,
+ DeviceString from,
+ DeviceString to,
+ int vectype)
+{
+ if (!p_out_derivs) {
+ p_in_derivs = false;
+ }
+ else if (!p_in_derivs) {
+ p_out[1] = zero_float3();
+ p_out[2] = zero_float3();
+ }
+
+ bool res;
+ float m[16];
+
+ if (from == DeviceStrings::u_common) {
+ res = osl_get_inverse_matrix(sg, m, to);
+ }
+ else if (to == DeviceStrings::u_common) {
+ res = osl_get_matrix(sg, m, from);
+ }
+ else {
+ res = osl_get_from_to_matrix(sg, m, from, to);
+ }
+
+ if (res) {
+ if (vectype == 2 /* TypeDesc::POINT */) {
+ if (p_in_derivs)
+ osl_transform_dvmdv(p_out, m, p_in);
+ else
+ osl_transform_vmv(p_out, m, p_in);
+ }
+ else if (vectype == 3 /* TypeDesc::VECTOR */) {
+ if (p_in_derivs)
+ osl_transformv_dvmdv(p_out, m, p_in);
+ else
+ osl_transformv_vmv(p_out, m, p_in);
+ }
+ else if (vectype == 4 /* TypeDesc::NORMAL */) {
+ if (p_in_derivs)
+ osl_transformn_dvmdv(p_out, m, p_in);
+ else
+ osl_transformn_vmv(p_out, m, p_in);
+ }
+ else {
+ res = false;
+ }
+ }
+ else {
+ p_out[0] = p_in[0];
+ if (p_in_derivs) {
+ p_out[1] = p_in[1];
+ p_out[2] = p_in[2];
+ }
+ }
+
+ return res;
+}
+
+ccl_device_extern bool osl_transform_triple_nonlinear(ccl_private ShaderGlobals *sg,
+ ccl_private float3 *p_in,
+ int p_in_derivs,
+ ccl_private float3 *p_out,
+ int p_out_derivs,
+ DeviceString from,
+ DeviceString to,
+ int vectype)
+{
+ return osl_transform_triple(sg, p_in, p_in_derivs, p_out, p_out_derivs, from, to, vectype);
+}
+
+ccl_device_extern void osl_transpose_mm(ccl_private float *res, ccl_private const float *m)
+{
+ copy_matrix(res, *reinterpret_cast<ccl_private const ProjectionTransform *>(m));
+}
+
+#if 0
+ccl_device_extern float osl_determinant_fm(ccl_private const float *m)
+{
+}
+#endif
+
+/* Attributes */
+
+#include "kernel/geom/geom.h"
+
+typedef long long TypeDesc;
+
+ccl_device_inline bool set_attribute_float(ccl_private float fval[3],
+ TypeDesc type,
+ bool derivatives,
+ ccl_private void *val)
+{
+ const unsigned char type_basetype = type & 0xF;
+ const unsigned char type_aggregate = (type >> 8) & 0xF;
+ const int type_arraylen = type >> 32;
+
+ if (type_basetype == 11 /* TypeDesc::FLOAT */) {
+ if ((type_aggregate == 2 /* TypeDesc::VEC2 */) ||
+ (type_aggregate == 1 && type_arraylen == 2)) {
+ for (int i = 0; i < (derivatives ? 3 : 1); ++i) {
+ static_cast<ccl_private float *>(val)[i * 2 + 0] = fval[i];
+ static_cast<ccl_private float *>(val)[i * 2 + 1] = fval[i];
+ }
+ return true;
+ }
+ if ((type_aggregate == 3 /* TypeDesc::VEC3 */) ||
+ (type_aggregate == 1 && type_arraylen == 3)) {
+ for (int i = 0; i < (derivatives ? 3 : 1); ++i) {
+ static_cast<ccl_private float *>(val)[i * 3 + 0] = fval[i];
+ static_cast<ccl_private float *>(val)[i * 3 + 1] = fval[i];
+ static_cast<ccl_private float *>(val)[i * 3 + 2] = fval[i];
+ }
+ return true;
+ }
+ if ((type_aggregate == 4 /* TypeDesc::VEC4 */) ||
+ (type_aggregate == 1 && type_arraylen == 4)) {
+ for (int i = 0; i < (derivatives ? 3 : 1); ++i) {
+ static_cast<ccl_private float *>(val)[i * 4 + 0] = fval[i];
+ static_cast<ccl_private float *>(val)[i * 4 + 1] = fval[i];
+ static_cast<ccl_private float *>(val)[i * 4 + 2] = fval[i];
+ static_cast<ccl_private float *>(val)[i * 4 + 3] = 1.0f;
+ }
+ return true;
+ }
+ if ((type_aggregate == 1 /* TypeDesc::SCALAR */)) {
+ for (int i = 0; i < (derivatives ? 3 : 1); ++i) {
+ static_cast<ccl_private float *>(val)[i] = fval[i];
+ }
+ return true;
+ }
+ }
+
+ return false;
+}
+ccl_device_inline bool set_attribute_float(float f,
+ TypeDesc type,
+ bool derivatives,
+ ccl_private void *val)
+{
+ float fv[3];
+
+ fv[0] = f;
+ fv[1] = 0.0f;
+ fv[2] = 0.0f;
+
+ return set_attribute_float(fv, type, derivatives, val);
+}
+ccl_device_inline bool set_attribute_float2(ccl_private float2 fval[3],
+ TypeDesc type,
+ bool derivatives,
+ ccl_private void *val)
+{
+ const unsigned char type_basetype = type & 0xF;
+ const unsigned char type_aggregate = (type >> 8) & 0xF;
+ const int type_arraylen = type >> 32;
+
+ if (type_basetype == 11 /* TypeDesc::FLOAT */) {
+ if ((type_aggregate == 2 /* TypeDesc::VEC2 */) ||
+ (type_aggregate == 1 && type_arraylen == 2)) {
+ for (int i = 0; i < (derivatives ? 3 : 1); ++i) {
+ static_cast<ccl_private float *>(val)[i * 2 + 0] = fval[i].x;
+ static_cast<ccl_private float *>(val)[i * 2 + 1] = fval[i].y;
+ }
+ return true;
+ }
+ if ((type_aggregate == 3 /* TypeDesc::VEC3 */) ||
+ (type_aggregate == 1 && type_arraylen == 3)) {
+ for (int i = 0; i < (derivatives ? 3 : 1); ++i) {
+ static_cast<ccl_private float *>(val)[i * 3 + 0] = fval[i].x;
+ static_cast<ccl_private float *>(val)[i * 3 + 1] = fval[i].y;
+ static_cast<ccl_private float *>(val)[i * 3 + 2] = 0.0f;
+ }
+ return true;
+ }
+ if ((type_aggregate == 4 /* TypeDesc::VEC4 */) ||
+ (type_aggregate == 1 && type_arraylen == 4)) {
+ for (int i = 0; i < (derivatives ? 3 : 1); ++i) {
+ static_cast<ccl_private float *>(val)[i * 4 + 0] = fval[i].x;
+ static_cast<ccl_private float *>(val)[i * 4 + 1] = fval[i].y;
+ static_cast<ccl_private float *>(val)[i * 4 + 2] = 0.0f;
+ static_cast<ccl_private float *>(val)[i * 4 + 3] = 1.0f;
+ }
+ return true;
+ }
+ if ((type_aggregate == 1 /* TypeDesc::SCALAR */)) {
+ for (int i = 0; i < (derivatives ? 3 : 1); ++i) {
+ static_cast<ccl_private float *>(val)[i] = fval[i].x;
+ }
+ return true;
+ }
+ }
+
+ return false;
+}
+ccl_device_inline bool set_attribute_float3(ccl_private float3 fval[3],
+ TypeDesc type,
+ bool derivatives,
+ ccl_private void *val)
+{
+ const unsigned char type_basetype = type & 0xF;
+ const unsigned char type_aggregate = (type >> 8) & 0xF;
+ const int type_arraylen = type >> 32;
+
+ if (type_basetype == 11 /* TypeDesc::FLOAT */) {
+ if ((type_aggregate == 3 /* TypeDesc::VEC3 */) ||
+ (type_aggregate == 1 && type_arraylen == 3)) {
+ for (int i = 0; i < (derivatives ? 3 : 1); ++i) {
+ static_cast<ccl_private float *>(val)[i * 3 + 0] = fval[i].x;
+ static_cast<ccl_private float *>(val)[i * 3 + 1] = fval[i].y;
+ static_cast<ccl_private float *>(val)[i * 3 + 2] = fval[i].z;
+ }
+ return true;
+ }
+ if ((type_aggregate == 4 /* TypeDesc::VEC4 */) ||
+ (type_aggregate == 1 && type_arraylen == 4)) {
+ for (int i = 0; i < (derivatives ? 3 : 1); ++i) {
+ static_cast<ccl_private float *>(val)[i * 4 + 0] = fval[i].x;
+ static_cast<ccl_private float *>(val)[i * 4 + 1] = fval[i].y;
+ static_cast<ccl_private float *>(val)[i * 4 + 2] = fval[i].z;
+ static_cast<ccl_private float *>(val)[i * 4 + 3] = 1.0f;
+ }
+ return true;
+ }
+ if ((type_aggregate == 1 /* TypeDesc::SCALAR */)) {
+ for (int i = 0; i < (derivatives ? 3 : 1); ++i) {
+ static_cast<ccl_private float *>(val)[i] = average(fval[i]);
+ }
+ return true;
+ }
+ }
+
+ return false;
+}
+ccl_device_inline bool set_attribute_float3(float3 f,
+ TypeDesc type,
+ bool derivatives,
+ ccl_private void *val)
+{
+ float3 fv[3];
+
+ fv[0] = f;
+ fv[1] = make_float3(0.0f, 0.0f, 0.0f);
+ fv[2] = make_float3(0.0f, 0.0f, 0.0f);
+
+ return set_attribute_float3(fv, type, derivatives, val);
+}
+ccl_device_inline bool set_attribute_float4(ccl_private float4 fval[3],
+ TypeDesc type,
+ bool derivatives,
+ ccl_private void *val)
+{
+ const unsigned char type_basetype = type & 0xF;
+ const unsigned char type_aggregate = (type >> 8) & 0xF;
+ const int type_arraylen = type >> 32;
+
+ if (type_basetype == 11 /* TypeDesc::FLOAT */) {
+ if ((type_aggregate == 3 /* TypeDesc::VEC3 */) ||
+ (type_aggregate == 1 && type_arraylen == 3)) {
+ for (int i = 0; i < (derivatives ? 3 : 1); ++i) {
+ static_cast<ccl_private float *>(val)[i * 3 + 0] = fval[i].x;
+ static_cast<ccl_private float *>(val)[i * 3 + 1] = fval[i].y;
+ static_cast<ccl_private float *>(val)[i * 3 + 2] = fval[i].z;
+ }
+ return true;
+ }
+ if ((type_aggregate == 4 /* TypeDesc::VEC4 */) ||
+ (type_aggregate == 1 && type_arraylen == 4)) {
+ for (int i = 0; i < (derivatives ? 3 : 1); ++i) {
+ static_cast<ccl_private float *>(val)[i * 4 + 0] = fval[i].x;
+ static_cast<ccl_private float *>(val)[i * 4 + 1] = fval[i].y;
+ static_cast<ccl_private float *>(val)[i * 4 + 2] = fval[i].z;
+ static_cast<ccl_private float *>(val)[i * 4 + 3] = fval[i].w;
+ }
+ return true;
+ }
+ if ((type_aggregate == 1 /* TypeDesc::SCALAR */)) {
+ for (int i = 0; i < (derivatives ? 3 : 1); ++i) {
+ static_cast<ccl_private float *>(val)[i] = average(float4_to_float3(fval[i]));
+ }
+ return true;
+ }
+ }
+
+ return false;
+}
+ccl_device_inline bool set_attribute_matrix(ccl_private const Transform &tfm,
+ TypeDesc type,
+ ccl_private void *val)
+{
+ const unsigned char type_basetype = type & 0xF;
+ const unsigned char type_aggregate = (type >> 8) & 0xF;
+
+ if (type_basetype == 11 /* TypeDesc::FLOAT */ && type_aggregate == 16 /* TypeDesc::MATRIX44 */) {
+ copy_matrix(static_cast<ccl_private float *>(val), tfm);
+ return true;
+ }
+
+ return false;
+}
+
+ccl_device_inline bool get_background_attribute(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ DeviceString name,
+ TypeDesc type,
+ bool derivatives,
+ ccl_private void *val)
+{
+ if (name == DeviceStrings::u_path_ray_length) {
+ /* Ray Length */
+ float f = sd->ray_length;
+ return set_attribute_float(f, type, derivatives, val);
+ }
+
+ return false;
+}
+
+ccl_device_inline bool get_object_attribute(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ const AttributeDescriptor &desc,
+ TypeDesc type,
+ bool derivatives,
+ ccl_private void *val)
+{
+ if (desc.type == NODE_ATTR_FLOAT) {
+ float fval[3];
+#ifdef __VOLUME__
+ if (primitive_is_volume_attribute(sd, desc))
+ fval[0] = primitive_volume_attribute_float(kg, sd, desc);
+ else
+#endif
+ fval[0] = primitive_surface_attribute_float(
+ kg, sd, desc, derivatives ? &fval[1] : nullptr, derivatives ? &fval[2] : nullptr);
+ return set_attribute_float(fval, type, derivatives, val);
+ }
+ else if (desc.type == NODE_ATTR_FLOAT2) {
+ float2 fval[3];
+#ifdef __VOLUME__
+ if (primitive_is_volume_attribute(sd, desc))
+ return false;
+ else
+#endif
+ fval[0] = primitive_surface_attribute_float2(
+ kg, sd, desc, derivatives ? &fval[1] : nullptr, derivatives ? &fval[2] : nullptr);
+ return set_attribute_float2(fval, type, derivatives, val);
+ }
+ else if (desc.type == NODE_ATTR_FLOAT3) {
+ float3 fval[3];
+#ifdef __VOLUME__
+ if (primitive_is_volume_attribute(sd, desc))
+ fval[0] = primitive_volume_attribute_float3(kg, sd, desc);
+ else
+#endif
+ fval[0] = primitive_surface_attribute_float3(
+ kg, sd, desc, derivatives ? &fval[1] : nullptr, derivatives ? &fval[2] : nullptr);
+ return set_attribute_float3(fval, type, derivatives, val);
+ }
+ else if (desc.type == NODE_ATTR_FLOAT4 || desc.type == NODE_ATTR_RGBA) {
+ float4 fval[3];
+#ifdef __VOLUME__
+ if (primitive_is_volume_attribute(sd, desc))
+ fval[0] = primitive_volume_attribute_float4(kg, sd, desc);
+ else
+#endif
+ fval[0] = primitive_surface_attribute_float4(
+ kg, sd, desc, derivatives ? &fval[1] : nullptr, derivatives ? &fval[2] : nullptr);
+ return set_attribute_float4(fval, type, derivatives, val);
+ }
+ else if (desc.type == NODE_ATTR_MATRIX) {
+ Transform tfm = primitive_attribute_matrix(kg, desc);
+ return set_attribute_matrix(tfm, type, val);
+ }
+
+ return false;
+}
+
+ccl_device_inline bool get_object_standard_attribute(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ DeviceString name,
+ TypeDesc type,
+ bool derivatives,
+ ccl_private void *val)
+{
+ /* Object attributes */
+ if (name == DeviceStrings::u_object_location) {
+ float3 f = object_location(kg, sd);
+ return set_attribute_float3(f, type, derivatives, val);
+ }
+ else if (name == DeviceStrings::u_object_color) {
+ float3 f = object_color(kg, sd->object);
+ return set_attribute_float3(f, type, derivatives, val);
+ }
+ else if (name == DeviceStrings::u_object_alpha) {
+ float f = object_alpha(kg, sd->object);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == DeviceStrings::u_object_index) {
+ float f = object_pass_id(kg, sd->object);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == DeviceStrings::u_geom_dupli_generated) {
+ float3 f = object_dupli_generated(kg, sd->object);
+ return set_attribute_float3(f, type, derivatives, val);
+ }
+ else if (name == DeviceStrings::u_geom_dupli_uv) {
+ float3 f = object_dupli_uv(kg, sd->object);
+ return set_attribute_float3(f, type, derivatives, val);
+ }
+ else if (name == DeviceStrings::u_material_index) {
+ float f = shader_pass_id(kg, sd);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == DeviceStrings::u_object_random) {
+ float f = object_random_number(kg, sd->object);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+
+ /* Particle attributes */
+ else if (name == DeviceStrings::u_particle_index) {
+ int particle_id = object_particle_id(kg, sd->object);
+ float f = particle_index(kg, particle_id);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == DeviceStrings::u_particle_random) {
+ int particle_id = object_particle_id(kg, sd->object);
+ float f = hash_uint2_to_float(particle_index(kg, particle_id), 0);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+
+ else if (name == DeviceStrings::u_particle_age) {
+ int particle_id = object_particle_id(kg, sd->object);
+ float f = particle_age(kg, particle_id);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == DeviceStrings::u_particle_lifetime) {
+ int particle_id = object_particle_id(kg, sd->object);
+ float f = particle_lifetime(kg, particle_id);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == DeviceStrings::u_particle_location) {
+ int particle_id = object_particle_id(kg, sd->object);
+ float3 f = particle_location(kg, particle_id);
+ return set_attribute_float3(f, type, derivatives, val);
+ }
+#if 0 /* unsupported */
+ else if (name == DeviceStrings::u_particle_rotation) {
+ int particle_id = object_particle_id(kg, sd->object);
+ float4 f = particle_rotation(kg, particle_id);
+ return set_attribute_float4(f, type, derivatives, val);
+ }
+#endif
+ else if (name == DeviceStrings::u_particle_size) {
+ int particle_id = object_particle_id(kg, sd->object);
+ float f = particle_size(kg, particle_id);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == DeviceStrings::u_particle_velocity) {
+ int particle_id = object_particle_id(kg, sd->object);
+ float3 f = particle_velocity(kg, particle_id);
+ return set_attribute_float3(f, type, derivatives, val);
+ }
+ else if (name == DeviceStrings::u_particle_angular_velocity) {
+ int particle_id = object_particle_id(kg, sd->object);
+ float3 f = particle_angular_velocity(kg, particle_id);
+ return set_attribute_float3(f, type, derivatives, val);
+ }
+
+ /* Geometry attributes */
+#if 0 /* TODO */
+ else if (name == DeviceStrings::u_geom_numpolyvertices) {
+ return false;
+ }
+ else if (name == DeviceStrings::u_geom_trianglevertices ||
+ name == DeviceStrings::u_geom_polyvertices) {
+ return false;
+ }
+ else if (name == DeviceStrings::u_geom_name) {
+ return false;
+ }
+#endif
+ else if (name == DeviceStrings::u_is_smooth) {
+ float f = ((sd->shader & SHADER_SMOOTH_NORMAL) != 0);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+
+#ifdef __HAIR__
+ /* Hair attributes */
+ else if (name == DeviceStrings::u_is_curve) {
+ float f = (sd->type & PRIMITIVE_CURVE) != 0;
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == DeviceStrings::u_curve_thickness) {
+ float f = curve_thickness(kg, sd);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == DeviceStrings::u_curve_tangent_normal) {
+ float3 f = curve_tangent_normal(kg, sd);
+ return set_attribute_float3(f, type, derivatives, val);
+ }
+ else if (name == DeviceStrings::u_curve_random) {
+ float f = curve_random(kg, sd);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+#endif
+
+#ifdef __POINTCLOUD__
+ /* Point attributes */
+ else if (name == DeviceStrings::u_is_point) {
+ float f = (sd->type & PRIMITIVE_POINT) != 0;
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == DeviceStrings::u_point_radius) {
+ float f = point_radius(kg, sd);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+ else if (name == DeviceStrings::u_point_position) {
+ float3 f = point_position(kg, sd);
+ return set_attribute_float3(f, type, derivatives, val);
+ }
+ else if (name == DeviceStrings::u_point_random) {
+ float f = point_random(kg, sd);
+ return set_attribute_float(f, type, derivatives, val);
+ }
+#endif
+
+ else if (name == DeviceStrings::u_normal_map_normal) {
+ if (sd->type & PRIMITIVE_TRIANGLE) {
+ float3 f = triangle_smooth_normal_unnormalized(kg, sd, sd->Ng, sd->prim, sd->u, sd->v);
+ return set_attribute_float3(f, type, derivatives, val);
+ }
+ else {
+ return false;
+ }
+ }
+
+ return get_background_attribute(kg, sd, name, type, derivatives, val);
+}
+
+ccl_device_extern bool osl_get_attribute(ccl_private ShaderGlobals *sg,
+ int derivatives,
+ DeviceString object_name,
+ DeviceString name,
+ int array_lookup,
+ int index,
+ TypeDesc type,
+ ccl_private void *res)
+{
+ KernelGlobals kg = nullptr;
+ ccl_private ShaderData *const sd = static_cast<ccl_private ShaderData *>(sg->renderstate);
+ int object;
+
+ if (object_name != DeviceStrings::_emptystring_) {
+ /* TODO: Get object index from name */
+ return false;
+ }
+ else {
+ object = sd->object;
+ }
+
+ const uint64_t id = name.hash();
+
+ const AttributeDescriptor desc = find_attribute(kg, object, sd->prim, sd->type, id);
+ if (desc.offset != ATTR_STD_NOT_FOUND) {
+ return get_object_attribute(kg, sd, desc, type, derivatives, res);
+ }
+ else {
+ return get_object_standard_attribute(kg, sd, name, type, derivatives, res);
+ }
+}
+
+#if 0
+ccl_device_extern bool osl_bind_interpolated_param(ccl_private ShaderGlobals *sg,
+ DeviceString name,
+ long long type,
+ int userdata_has_derivs,
+ ccl_private void *userdata_data,
+ int symbol_has_derivs,
+ ccl_private void *symbol_data,
+ int symbol_data_size,
+ ccl_private void *userdata_initialized,
+ int userdata_index)
+{
+ return false;
+}
+#endif
+
+/* Noise */
+
+#include "kernel/svm/noise.h"
+#include "util/hash.h"
+
+ccl_device_extern uint osl_hash_ii(int x)
+{
+ return hash_uint(x);
+}
+
+ccl_device_extern uint osl_hash_if(float x)
+{
+ return hash_uint(__float_as_uint(x));
+}
+
+ccl_device_extern uint osl_hash_iff(float x, float y)
+{
+ return hash_uint2(__float_as_uint(x), __float_as_uint(y));
+}
+
+ccl_device_extern uint osl_hash_iv(ccl_private const float3 *v)
+{
+ return hash_uint3(__float_as_uint(v->x), __float_as_uint(v->y), __float_as_uint(v->z));
+}
+
+ccl_device_extern uint osl_hash_ivf(ccl_private const float3 *v, float w)
+{
+ return hash_uint4(
+ __float_as_uint(v->x), __float_as_uint(v->y), __float_as_uint(v->z), __float_as_uint(w));
+}
+
+ccl_device_extern OSLNoiseOptions *osl_get_noise_options(ccl_private ShaderGlobals *sg)
+{
+ return nullptr;
+}
+
+ccl_device_extern void osl_noiseparams_set_anisotropic(ccl_private OSLNoiseOptions *opt,
+ int anisotropic)
+{
+}
+
+ccl_device_extern void osl_noiseparams_set_do_filter(ccl_private OSLNoiseOptions *opt,
+ int do_filter)
+{
+}
+
+ccl_device_extern void osl_noiseparams_set_direction(ccl_private OSLNoiseOptions *opt,
+ float3 *direction)
+{
+}
+
+ccl_device_extern void osl_noiseparams_set_bandwidth(ccl_private OSLNoiseOptions *opt,
+ float bandwidth)
+{
+}
+
+ccl_device_extern void osl_noiseparams_set_impulses(ccl_private OSLNoiseOptions *opt,
+ float impulses)
+{
+}
+
+#define OSL_NOISE_IMPL(name, op) \
+ ccl_device_extern float name##_ff(float x) \
+ { \
+ return op##_1d(x); \
+ } \
+ ccl_device_extern float name##_fff(float x, float y) \
+ { \
+ return op##_2d(make_float2(x, y)); \
+ } \
+ ccl_device_extern float name##_fv(ccl_private const float3 *v) \
+ { \
+ return op##_3d(*v); \
+ } \
+ ccl_device_extern float name##_fvf(ccl_private const float3 *v, float w) \
+ { \
+ return op##_4d(make_float4(v->x, v->y, v->z, w)); \
+ } \
+ ccl_device_extern void name##_vf(ccl_private float3 *res, float x) \
+ { \
+ /* TODO: This is not correct. Really need to change the hash function inside the noise \
+ * function to spit out a vector instead of a scalar. */ \
+ const float n = name##_ff(x); \
+ res->x = n; \
+ res->y = n; \
+ res->z = n; \
+ } \
+ ccl_device_extern void name##_vff(ccl_private float3 *res, float x, float y) \
+ { \
+ const float n = name##_fff(x, y); \
+ res->x = n; \
+ res->y = n; \
+ res->z = n; \
+ } \
+ ccl_device_extern void name##_vv(ccl_private float3 *res, const float3 *v) \
+ { \
+ const float n = name##_fv(v); \
+ res->x = n; \
+ res->y = n; \
+ res->z = n; \
+ } \
+ ccl_device_extern void name##_vvf(ccl_private float3 *res, const float3 *v, float w) \
+ { \
+ const float n = name##_fvf(v, w); \
+ res->x = n; \
+ res->y = n; \
+ res->z = n; \
+ }
+
+ccl_device_forceinline float hashnoise_1d(float p)
+{
+ const uint x = __float_as_uint(p);
+ return hash_uint(x) / static_cast<float>(~0u);
+}
+ccl_device_forceinline float hashnoise_2d(float2 p)
+{
+ const uint x = __float_as_uint(p.x);
+ const uint y = __float_as_uint(p.y);
+ return hash_uint2(x, y) / static_cast<float>(~0u);
+}
+ccl_device_forceinline float hashnoise_3d(float3 p)
+{
+ const uint x = __float_as_uint(p.x);
+ const uint y = __float_as_uint(p.y);
+ const uint z = __float_as_uint(p.z);
+ return hash_uint3(x, y, z) / static_cast<float>(~0u);
+}
+ccl_device_forceinline float hashnoise_4d(float4 p)
+{
+ const uint x = __float_as_uint(p.x);
+ const uint y = __float_as_uint(p.y);
+ const uint z = __float_as_uint(p.z);
+ const uint w = __float_as_uint(p.w);
+ return hash_uint4(x, y, z, w) / static_cast<float>(~0u);
+}
+
+/* TODO: Implement all noise functions */
+OSL_NOISE_IMPL(osl_hashnoise, hashnoise)
+OSL_NOISE_IMPL(osl_noise, noise)
+OSL_NOISE_IMPL(osl_snoise, snoise)
+
+/* Texturing */
+
+ccl_device_extern ccl_private OSLTextureOptions *osl_get_texture_options(
+ ccl_private ShaderGlobals *sg)
+{
+ return nullptr;
+}
+
+ccl_device_extern void osl_texture_set_firstchannel(ccl_private OSLTextureOptions *opt,
+ int firstchannel)
+{
+}
+
+ccl_device_extern void osl_texture_set_swrap_code(ccl_private OSLTextureOptions *opt, int mode)
+{
+}
+
+ccl_device_extern void osl_texture_set_twrap_code(ccl_private OSLTextureOptions *opt, int mode)
+{
+}
+
+ccl_device_extern void osl_texture_set_rwrap_code(ccl_private OSLTextureOptions *opt, int mode)
+{
+}
+
+ccl_device_extern void osl_texture_set_stwrap_code(ccl_private OSLTextureOptions *opt, int mode)
+{
+}
+
+ccl_device_extern void osl_texture_set_sblur(ccl_private OSLTextureOptions *opt, float blur)
+{
+}
+
+ccl_device_extern void osl_texture_set_tblur(ccl_private OSLTextureOptions *opt, float blur)
+{
+}
+
+ccl_device_extern void osl_texture_set_rblur(ccl_private OSLTextureOptions *opt, float blur)
+{
+}
+
+ccl_device_extern void osl_texture_set_stblur(ccl_private OSLTextureOptions *opt, float blur)
+{
+}
+
+ccl_device_extern void osl_texture_set_swidth(ccl_private OSLTextureOptions *opt, float width)
+{
+}
+
+ccl_device_extern void osl_texture_set_twidth(ccl_private OSLTextureOptions *opt, float width)
+{
+}
+
+ccl_device_extern void osl_texture_set_rwidth(ccl_private OSLTextureOptions *opt, float width)
+{
+}
+
+ccl_device_extern void osl_texture_set_stwidth(ccl_private OSLTextureOptions *opt, float width)
+{
+}
+
+ccl_device_extern void osl_texture_set_fill(ccl_private OSLTextureOptions *opt, float fill)
+{
+}
+
+ccl_device_extern void osl_texture_set_time(ccl_private OSLTextureOptions *opt, float time)
+{
+}
+
+ccl_device_extern void osl_texture_set_interp_code(ccl_private OSLTextureOptions *opt, int mode)
+{
+}
+
+ccl_device_extern void osl_texture_set_subimage(ccl_private OSLTextureOptions *opt, int subimage)
+{
+}
+
+ccl_device_extern void osl_texture_set_missingcolor_arena(ccl_private OSLTextureOptions *opt,
+ ccl_private float3 *color)
+{
+}
+
+ccl_device_extern void osl_texture_set_missingcolor_alpha(ccl_private OSLTextureOptions *opt,
+ int nchannels,
+ float alpha)
+{
+}
+
+ccl_device_extern bool osl_texture(ccl_private ShaderGlobals *sg,
+ DeviceString filename,
+ ccl_private void *texture_handle,
+ OSLTextureOptions *opt,
+ float s,
+ float t,
+ float dsdx,
+ float dtdx,
+ float dsdy,
+ float dtdy,
+ int nchannels,
+ ccl_private float *result,
+ ccl_private float *dresultdx,
+ ccl_private float *dresultdy,
+ ccl_private float *alpha,
+ ccl_private float *dalphadx,
+ ccl_private float *dalphady,
+ ccl_private void *errormessage)
+{
+ if (!texture_handle) {
+ return false;
+ }
+
+ /* Only SVM textures are supported. */
+ int id = static_cast<int>(reinterpret_cast<size_t>(texture_handle) - 1);
+
+ const float4 rgba = kernel_tex_image_interp(nullptr, id, s, 1.0f - t);
+
+ result[0] = rgba.x;
+ if (nchannels > 1)
+ result[1] = rgba.y;
+ if (nchannels > 2)
+ result[2] = rgba.z;
+ if (nchannels > 3)
+ result[3] = rgba.w;
+
+ return true;
+}
+
+ccl_device_extern bool osl_texture3d(ccl_private ShaderGlobals *sg,
+ DeviceString filename,
+ ccl_private void *texture_handle,
+ OSLTextureOptions *opt,
+ ccl_private const float3 *P,
+ ccl_private const float3 *dPdx,
+ ccl_private const float3 *dPdy,
+ ccl_private const float3 *dPdz,
+ int nchannels,
+ ccl_private float *result,
+ ccl_private float *dresultds,
+ ccl_private float *dresultdt,
+ ccl_private float *alpha,
+ ccl_private float *dalphadx,
+ ccl_private float *dalphady,
+ ccl_private void *errormessage)
+{
+ if (!texture_handle) {
+ return false;
+ }
+
+ /* Only SVM textures are supported. */
+ int id = static_cast<int>(reinterpret_cast<size_t>(texture_handle) - 1);
+
+ const float4 rgba = kernel_tex_image_interp_3d(nullptr, id, *P, INTERPOLATION_NONE);
+
+ result[0] = rgba.x;
+ if (nchannels > 1)
+ result[1] = rgba.y;
+ if (nchannels > 2)
+ result[2] = rgba.z;
+ if (nchannels > 3)
+ result[3] = rgba.w;
+
+ return true;
+}
+
+ccl_device_extern bool osl_environment(ccl_private ShaderGlobals *sg,
+ DeviceString filename,
+ ccl_private void *texture_handle,
+ OSLTextureOptions *opt,
+ ccl_private const float3 *R,
+ ccl_private const float3 *dRdx,
+ ccl_private const float3 *dRdy,
+ int nchannels,
+ ccl_private float *result,
+ ccl_private float *dresultds,
+ ccl_private float *dresultdt,
+ ccl_private float *alpha,
+ ccl_private float *dalphax,
+ ccl_private float *dalphay,
+ ccl_private void *errormessage)
+{
+ result[0] = 1.0f;
+ if (nchannels > 1)
+ result[1] = 0.0f;
+ if (nchannels > 2)
+ result[2] = 1.0f;
+ if (nchannels > 3)
+ result[3] = 1.0f;
+
+ return false;
+}
+
+ccl_device_extern bool osl_get_textureinfo(ccl_private ShaderGlobals *sg,
+ DeviceString filename,
+ ccl_private void *texture_handle,
+ DeviceString dataname,
+ int basetype,
+ int arraylen,
+ int aggegrate,
+ ccl_private void *data,
+ ccl_private void *errormessage)
+{
+ return false;
+}
+
+ccl_device_extern bool osl_get_textureinfo_st(ccl_private ShaderGlobals *sg,
+ DeviceString filename,
+ ccl_private void *texture_handle,
+ float s,
+ float t,
+ DeviceString dataname,
+ int basetype,
+ int arraylen,
+ int aggegrate,
+ ccl_private void *data,
+ ccl_private void *errormessage)
+{
+ return osl_get_textureinfo(
+ sg, filename, texture_handle, dataname, basetype, arraylen, aggegrate, data, errormessage);
+}
+
+/* Standard library */
+
+#define OSL_OP_IMPL_II(name, op) \
+ ccl_device_extern int name##_ii(int a) \
+ { \
+ return op(a); \
+ }
+#define OSL_OP_IMPL_IF(name, op) \
+ ccl_device_extern int name##_if(float a) \
+ { \
+ return op(a); \
+ }
+#define OSL_OP_IMPL_FF(name, op) \
+ ccl_device_extern float name##_ff(float a) \
+ { \
+ return op(a); \
+ }
+#define OSL_OP_IMPL_DFDF(name, op) \
+ ccl_device_extern void name##_dfdf(ccl_private float *res, ccl_private const float *a) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i] = op(a[i]); \
+ } \
+ }
+#define OSL_OP_IMPL_DFDV(name, op) \
+ ccl_device_extern void name##_dfdv(ccl_private float *res, ccl_private const float3 *a) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i] = op(a[i]); \
+ } \
+ }
+#define OSL_OP_IMPL_FV(name, op) \
+ ccl_device_extern float name##_fv(ccl_private const float3 *a) \
+ { \
+ return op(*a); \
+ }
+#define OSL_OP_IMPL_VV(name, op) \
+ ccl_device_extern void name##_vv(ccl_private float3 *res, ccl_private const float3 *a) \
+ { \
+ *res = op(*a); \
+ }
+#define OSL_OP_IMPL_VV_(name, op) \
+ ccl_device_extern void name##_vv(ccl_private float3 *res, ccl_private const float3 *a) \
+ { \
+ res->x = op(a->x); \
+ res->y = op(a->y); \
+ res->z = op(a->z); \
+ }
+#define OSL_OP_IMPL_DVDV(name, op) \
+ ccl_device_extern void name##_dvdv(ccl_private float3 *res, ccl_private const float3 *a) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i] = op(a[i]); \
+ } \
+ }
+#define OSL_OP_IMPL_DVDV_(name, op) \
+ ccl_device_extern void name##_dvdv(ccl_private float3 *res, ccl_private const float3 *a) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i].x = op(a[i].x); \
+ res[i].y = op(a[i].y); \
+ res[i].z = op(a[i].z); \
+ } \
+ }
+
+#define OSL_OP_IMPL_III(name, op) \
+ ccl_device_extern int name##_iii(int a, int b) \
+ { \
+ return op(a, b); \
+ }
+#define OSL_OP_IMPL_FFF(name, op) \
+ ccl_device_extern float name##_fff(float a, float b) \
+ { \
+ return op(a, b); \
+ }
+#define OSL_OP_IMPL_FVV(name, op) \
+ ccl_device_extern float name##_fvv(ccl_private const float3 *a, ccl_private const float3 *b) \
+ { \
+ return op(*a, *b); \
+ }
+#define OSL_OP_IMPL_DFFDF(name, op) \
+ ccl_device_extern void name##_dffdf( \
+ ccl_private float *res, float a, ccl_private const float *b) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i] = op(a, b[i]); \
+ } \
+ }
+#define OSL_OP_IMPL_DFDFF(name, op) \
+ ccl_device_extern void name##_dfdff( \
+ ccl_private float *res, ccl_private const float *a, float b) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i] = op(a[i], b); \
+ } \
+ }
+#define OSL_OP_IMPL_DFDFDF(name, op) \
+ ccl_device_extern void name##_dfdfdf( \
+ ccl_private float *res, ccl_private const float *a, ccl_private const float *b) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i] = op(a[i], b[i]); \
+ } \
+ }
+#define OSL_OP_IMPL_DFVDV(name, op) \
+ ccl_device_extern void name##_dfvdv( \
+ ccl_private float *res, ccl_private const float3 *a, ccl_private const float3 *b) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i] = op(a[0], b[i]); \
+ } \
+ }
+#define OSL_OP_IMPL_DFDVV(name, op) \
+ ccl_device_extern void name##_dfdvv( \
+ ccl_private float *res, ccl_private const float3 *a, ccl_private const float3 *b) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i] = op(a[i], b[0]); \
+ } \
+ }
+#define OSL_OP_IMPL_DFDVDV(name, op) \
+ ccl_device_extern void name##_dfdvdv( \
+ ccl_private float *res, ccl_private const float3 *a, ccl_private const float3 *b) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i] = op(a[i], b[i]); \
+ } \
+ }
+#define OSL_OP_IMPL_VVF_(name, op) \
+ ccl_device_extern void name##_vvf( \
+ ccl_private float3 *res, ccl_private const float3 *a, float b) \
+ { \
+ res->x = op(a->x, b); \
+ res->y = op(a->y, b); \
+ res->z = op(a->z, b); \
+ }
+#define OSL_OP_IMPL_VVV(name, op) \
+ ccl_device_extern void name##_vvv( \
+ ccl_private float3 *res, ccl_private const float3 *a, ccl_private const float3 *b) \
+ { \
+ *res = op(*a, *b); \
+ }
+#define OSL_OP_IMPL_VVV_(name, op) \
+ ccl_device_extern void name##_vvv( \
+ ccl_private float3 *res, ccl_private const float3 *a, ccl_private const float3 *b) \
+ { \
+ res->x = op(a->x, b->x); \
+ res->y = op(a->y, b->y); \
+ res->z = op(a->z, b->z); \
+ }
+#define OSL_OP_IMPL_DVVDF_(name, op) \
+ ccl_device_extern void name##_dvvdf( \
+ ccl_private float3 *res, ccl_private const float3 *a, ccl_private const float *b) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i].x = op(a[0].x, b[i]); \
+ res[i].y = op(a[0].y, b[i]); \
+ res[i].z = op(a[0].z, b[i]); \
+ } \
+ }
+#define OSL_OP_IMPL_DVDVF_(name, op) \
+ ccl_device_extern void name##_dvdvf( \
+ ccl_private float3 *res, ccl_private const float3 *a, float b) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i].x = op(a[i].x, b); \
+ res[i].y = op(a[i].y, b); \
+ res[i].z = op(a[i].z, b); \
+ } \
+ }
+#define OSL_OP_IMPL_DVVDV(name, op) \
+ ccl_device_extern void name##_dvvdv( \
+ ccl_private float3 *res, ccl_private const float3 *a, ccl_private const float3 *b) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i] = op(a[0], b[i]); \
+ } \
+ }
+#define OSL_OP_IMPL_DVVDV_(name, op) \
+ ccl_device_extern void name##_dvvdv( \
+ ccl_private float3 *res, ccl_private const float3 *a, ccl_private const float3 *b) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i].x = op(a[0].x, b[i].x); \
+ res[i].y = op(a[0].y, b[i].y); \
+ res[i].z = op(a[0].z, b[i].z); \
+ } \
+ }
+#define OSL_OP_IMPL_DVDVV(name, op) \
+ ccl_device_extern void name##_dvdvv( \
+ ccl_private float3 *res, ccl_private const float3 *a, ccl_private const float3 *b) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i] = op(a[i], b[0]); \
+ } \
+ }
+#define OSL_OP_IMPL_DVDVV_(name, op) \
+ ccl_device_extern void name##_dvdvv( \
+ ccl_private float3 *res, ccl_private const float3 *a, ccl_private const float3 *b) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i].x = op(a[i].x, b[0].x); \
+ res[i].y = op(a[i].y, b[0].y); \
+ res[i].z = op(a[i].z, b[0].z); \
+ } \
+ }
+#define OSL_OP_IMPL_DVDVDF_(name, op) \
+ ccl_device_extern void name##_dvdvdf( \
+ ccl_private float3 *res, ccl_private const float3 *a, ccl_private const float *b) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i].x = op(a[i].x, b[i]); \
+ res[i].y = op(a[i].y, b[i]); \
+ res[i].z = op(a[i].z, b[i]); \
+ } \
+ }
+#define OSL_OP_IMPL_DVDVDV(name, op) \
+ ccl_device_extern void name##_dvdvdv( \
+ ccl_private float3 *res, ccl_private const float3 *a, ccl_private const float3 *b) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i] = op(a[i], b[i]); \
+ } \
+ }
+#define OSL_OP_IMPL_DVDVDV_(name, op) \
+ ccl_device_extern void name##_dvdvdv( \
+ ccl_private float3 *res, ccl_private const float3 *a, ccl_private const float3 *b) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i].x = op(a[i].x, b[i].x); \
+ res[i].y = op(a[i].y, b[i].y); \
+ res[i].z = op(a[i].z, b[i].z); \
+ } \
+ }
+
+#define OSL_OP_IMPL_FFFF(name, op) \
+ ccl_device_extern float name##_ffff(float a, float b, float c) \
+ { \
+ return op(a, b, c); \
+ }
+#define OSL_OP_IMPL_DFFFDF(name, op) \
+ ccl_device_extern void name##_dfffdf( \
+ ccl_private float *res, float a, float b, ccl_private const float *c) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i] = op(a, b, c[i]); \
+ } \
+ }
+#define OSL_OP_IMPL_DFFDFF(name, op) \
+ ccl_device_extern void name##_dffdff( \
+ ccl_private float *res, float a, ccl_private const float *b, float c) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i] = op(a, b[i], c); \
+ } \
+ }
+#define OSL_OP_IMPL_DFFDFDF(name, op) \
+ ccl_device_extern void name##_dffdfdf( \
+ ccl_private float *res, float a, ccl_private const float *b, ccl_private const float *c) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i] = op(a, b[i], c[i]); \
+ } \
+ }
+
+#define OSL_OP_IMPL_DFDFFF(name, op) \
+ ccl_device_extern void name##_dfdfff( \
+ ccl_private float *res, ccl_private const float *a, float b, float c) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i] = op(a[i], b, c); \
+ } \
+ }
+#define OSL_OP_IMPL_DFDFFDF(name, op) \
+ ccl_device_extern void name##_dfdffdf( \
+ ccl_private float *res, ccl_private const float *a, float b, ccl_private const float *c) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i] = op(a[i], b, c[i]); \
+ } \
+ }
+#define OSL_OP_IMPL_DFDFDFF(name, op) \
+ ccl_device_extern void name##_dfdfdff( \
+ ccl_private float *res, ccl_private const float *a, ccl_private const float *b, float c) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i] = op(a[i], b[i], c); \
+ } \
+ }
+#define OSL_OP_IMPL_DFDFDFDF(name, op) \
+ ccl_device_extern void name##_dfdfdfdf(ccl_private float *res, \
+ ccl_private const float *a, \
+ ccl_private const float *b, \
+ ccl_private const float *c) \
+ { \
+ for (int i = 0; i < 3; ++i) { \
+ res[i] = op(a[i], b[i], c[i]); \
+ } \
+ }
+
+#define OSL_OP_IMPL_XX(name, op) \
+ OSL_OP_IMPL_FF(name, op) \
+ OSL_OP_IMPL_DFDF(name, op) \
+ OSL_OP_IMPL_VV_(name, op) \
+ OSL_OP_IMPL_DVDV_(name, op)
+
+#define OSL_OP_IMPL_XXX(name, op) \
+ OSL_OP_IMPL_FFF(name, op) \
+ OSL_OP_IMPL_DFFDF(name, op) \
+ OSL_OP_IMPL_DFDFF(name, op) \
+ OSL_OP_IMPL_DFDFDF(name, op) \
+ OSL_OP_IMPL_VVV_(name, op) \
+ OSL_OP_IMPL_DVVDV_(name, op) \
+ OSL_OP_IMPL_DVDVV_(name, op) \
+ OSL_OP_IMPL_DVDVDV_(name, op)
+
+OSL_OP_IMPL_XX(osl_acos, acosf)
+OSL_OP_IMPL_XX(osl_asin, asinf)
+OSL_OP_IMPL_XX(osl_atan, atanf)
+OSL_OP_IMPL_XXX(osl_atan2, atan2f)
+OSL_OP_IMPL_XX(osl_cos, cosf)
+OSL_OP_IMPL_XX(osl_sin, sinf)
+OSL_OP_IMPL_XX(osl_tan, tanf)
+OSL_OP_IMPL_XX(osl_cosh, coshf)
+OSL_OP_IMPL_XX(osl_sinh, sinhf)
+OSL_OP_IMPL_XX(osl_tanh, tanhf)
+
+ccl_device_forceinline int safe_divide(int a, int b)
+{
+ return (b != 0) ? a / b : 0;
+}
+ccl_device_forceinline int safe_modulo(int a, int b)
+{
+ return (b != 0) ? a % b : 0;
+}
+
+OSL_OP_IMPL_III(osl_safe_div, safe_divide)
+OSL_OP_IMPL_FFF(osl_safe_div, safe_divide)
+OSL_OP_IMPL_III(osl_safe_mod, safe_modulo)
+
+ccl_device_extern void osl_sincos_fff(float a, ccl_private float *b, ccl_private float *c)
+{
+ sincos(a, b, c);
+}
+ccl_device_extern void osl_sincos_dfdff(ccl_private const float *a,
+ ccl_private float *b,
+ ccl_private float *c)
+{
+ for (int i = 0; i < 3; ++i)
+ sincos(a[i], b + i, c);
+}
+ccl_device_extern void osl_sincos_dffdf(ccl_private const float *a,
+ ccl_private float *b,
+ ccl_private float *c)
+{
+ for (int i = 0; i < 3; ++i)
+ sincos(a[i], b, c + i);
+}
+ccl_device_extern void osl_sincos_dfdfdf(ccl_private const float *a,
+ ccl_private float *b,
+ ccl_private float *c)
+{
+ for (int i = 0; i < 3; ++i)
+ sincos(a[i], b + i, c + i);
+}
+ccl_device_extern void osl_sincos_vvv(ccl_private const float3 *a,
+ ccl_private float3 *b,
+ ccl_private float3 *c)
+{
+ sincos(a->x, &b->x, &c->x);
+ sincos(a->y, &b->y, &c->y);
+ sincos(a->z, &b->z, &c->z);
+}
+ccl_device_extern void osl_sincos_dvdvv(ccl_private const float3 *a,
+ ccl_private float3 *b,
+ ccl_private float3 *c)
+{
+ for (int i = 0; i < 3; ++i) {
+ sincos(a[i].x, &b[i].x, &c->x);
+ sincos(a[i].y, &b[i].y, &c->y);
+ sincos(a[i].z, &b[i].z, &c->z);
+ }
+}
+ccl_device_extern void osl_sincos_dvvdv(ccl_private const float3 *a,
+ ccl_private float3 *b,
+ ccl_private float3 *c)
+{
+ for (int i = 0; i < 3; ++i) {
+ sincos(a[i].x, &b->x, &c[i].x);
+ sincos(a[i].y, &b->y, &c[i].y);
+ sincos(a[i].z, &b->z, &c[i].z);
+ }
+}
+ccl_device_extern void osl_sincos_dvdvdv(ccl_private const float3 *a,
+ ccl_private float3 *b,
+ ccl_private float3 *c)
+{
+ for (int i = 0; i < 3; ++i) {
+ sincos(a[i].x, &b[i].x, &c[i].x);
+ sincos(a[i].y, &b[i].y, &c[i].y);
+ sincos(a[i].z, &b[i].z, &c[i].z);
+ }
+}
+
+OSL_OP_IMPL_XX(osl_log, logf)
+OSL_OP_IMPL_XX(osl_log2, log2f)
+OSL_OP_IMPL_XX(osl_log10, log10f)
+OSL_OP_IMPL_XX(osl_exp, expf)
+OSL_OP_IMPL_XX(osl_exp2, exp2f)
+OSL_OP_IMPL_XX(osl_expm1, expm1f)
+OSL_OP_IMPL_XX(osl_erf, erff)
+OSL_OP_IMPL_XX(osl_erfc, erfcf)
+
+OSL_OP_IMPL_XXX(osl_pow, safe_powf)
+OSL_OP_IMPL_VVF_(osl_pow, safe_powf)
+OSL_OP_IMPL_DVVDF_(osl_pow, safe_powf)
+OSL_OP_IMPL_DVDVF_(osl_pow, safe_powf)
+OSL_OP_IMPL_DVDVDF_(osl_pow, safe_powf)
+
+OSL_OP_IMPL_XX(osl_sqrt, sqrtf)
+OSL_OP_IMPL_XX(osl_inversesqrt, 1.0f / sqrtf)
+OSL_OP_IMPL_XX(osl_cbrt, cbrtf)
+
+OSL_OP_IMPL_FF(osl_logb, logbf)
+OSL_OP_IMPL_VV_(osl_logb, logbf)
+
+OSL_OP_IMPL_FF(osl_floor, floorf)
+OSL_OP_IMPL_VV_(osl_floor, floorf)
+OSL_OP_IMPL_FF(osl_ceil, ceilf)
+OSL_OP_IMPL_VV_(osl_ceil, ceilf)
+OSL_OP_IMPL_FF(osl_round, roundf)
+OSL_OP_IMPL_VV_(osl_round, roundf)
+OSL_OP_IMPL_FF(osl_trunc, truncf)
+OSL_OP_IMPL_VV_(osl_trunc, truncf)
+
+ccl_device_forceinline float step_impl(float edge, float x)
+{
+ return x < edge ? 0.0f : 1.0f;
+}
+
+OSL_OP_IMPL_FF(osl_sign, compatible_signf)
+OSL_OP_IMPL_VV_(osl_sign, compatible_signf)
+OSL_OP_IMPL_FFF(osl_step, step_impl)
+OSL_OP_IMPL_VVV_(osl_step, step_impl)
+
+OSL_OP_IMPL_IF(osl_isnan, isnan)
+OSL_OP_IMPL_IF(osl_isinf, isinf)
+OSL_OP_IMPL_IF(osl_isfinite, isfinite)
+
+OSL_OP_IMPL_II(osl_abs, abs)
+OSL_OP_IMPL_XX(osl_abs, fabsf)
+OSL_OP_IMPL_II(osl_fabs, abs)
+OSL_OP_IMPL_XX(osl_fabs, fabsf)
+OSL_OP_IMPL_XXX(osl_fmod, safe_modulo)
+
+OSL_OP_IMPL_FFFF(osl_smoothstep, smoothstep)
+OSL_OP_IMPL_DFFFDF(osl_smoothstep, smoothstep)
+OSL_OP_IMPL_DFFDFF(osl_smoothstep, smoothstep)
+OSL_OP_IMPL_DFFDFDF(osl_smoothstep, smoothstep)
+OSL_OP_IMPL_DFDFFF(osl_smoothstep, smoothstep)
+OSL_OP_IMPL_DFDFFDF(osl_smoothstep, smoothstep)
+OSL_OP_IMPL_DFDFDFF(osl_smoothstep, smoothstep)
+OSL_OP_IMPL_DFDFDFDF(osl_smoothstep, smoothstep)
+
+OSL_OP_IMPL_FVV(osl_dot, dot)
+OSL_OP_IMPL_DFDVV(osl_dot, dot)
+OSL_OP_IMPL_DFVDV(osl_dot, dot)
+OSL_OP_IMPL_DFDVDV(osl_dot, dot)
+OSL_OP_IMPL_VVV(osl_cross, cross)
+OSL_OP_IMPL_DVDVV(osl_cross, cross)
+OSL_OP_IMPL_DVVDV(osl_cross, cross)
+OSL_OP_IMPL_DVDVDV(osl_cross, cross)
+OSL_OP_IMPL_FV(osl_length, len)
+OSL_OP_IMPL_DFDV(osl_length, len)
+OSL_OP_IMPL_FVV(osl_distance, distance)
+OSL_OP_IMPL_DFDVV(osl_distance, distance)
+OSL_OP_IMPL_DFVDV(osl_distance, distance)
+OSL_OP_IMPL_DFDVDV(osl_distance, distance)
+OSL_OP_IMPL_VV(osl_normalize, safe_normalize)
+OSL_OP_IMPL_DVDV(osl_normalize, safe_normalize)
+
+ccl_device_extern void osl_calculatenormal(ccl_private float3 *res,
+ ccl_private ShaderGlobals *sg,
+ ccl_private const float3 *p)
+{
+ if (sg->flipHandedness)
+ *res = cross(p[2], p[1]);
+ else
+ *res = cross(p[1], p[2]);
+}
+
+ccl_device_extern float osl_area(ccl_private const float3 *p)
+{
+ return len(cross(p[2], p[1]));
+}
+
+ccl_device_extern float osl_filterwidth_fdf(ccl_private const float *x)
+{
+ return sqrtf(x[1] * x[1] + x[2] * x[2]);
+}
+
+ccl_device_extern void osl_filterwidth_vdv(ccl_private float *res, ccl_private const float *x)
+{
+ for (int i = 0; i < 3; ++i)
+ res[i] = osl_filterwidth_fdf(x + i);
+}
+
+ccl_device_extern bool osl_raytype_bit(ccl_private ShaderGlobals *sg, int bit)
+{
+ return (sg->raytype & bit) != 0;
+}
diff --git a/intern/cycles/kernel/osl/services_optix.cu b/intern/cycles/kernel/osl/services_optix.cu
new file mode 100644
index 00000000000..2a43a89a956
--- /dev/null
+++ b/intern/cycles/kernel/osl/services_optix.cu
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#define WITH_OSL
+
+// clang-format off
+#include "kernel/device/optix/compat.h"
+#include "kernel/device/optix/globals.h"
+
+#include "kernel/device/gpu/image.h" /* Texture lookup uses normal CUDA intrinsics. */
+
+#include "kernel/osl/services_gpu.h"
+// clang-format on
+
+extern "C" __device__ void __direct_callable__dummy_services()
+{
+}
diff --git a/intern/cycles/kernel/osl/shaders/node_geometry.osl b/intern/cycles/kernel/osl/shaders/node_geometry.osl
index cc891abd6e3..5d9284deac2 100644
--- a/intern/cycles/kernel/osl/shaders/node_geometry.osl
+++ b/intern/cycles/kernel/osl/shaders/node_geometry.osl
@@ -3,8 +3,7 @@
#include "stdcycles.h"
-shader node_geometry(normal NormalIn = N,
- string bump_offset = "center",
+shader node_geometry(string bump_offset = "center",
output point Position = point(0.0, 0.0, 0.0),
output normal Normal = normal(0.0, 0.0, 0.0),
@@ -17,7 +16,7 @@ shader node_geometry(normal NormalIn = N,
output float RandomPerIsland = 0.0)
{
Position = P;
- Normal = NormalIn;
+ Normal = N;
TrueNormal = Ng;
Incoming = I;
Parametric = point(1.0 - u - v, u, 0.0);
diff --git a/intern/cycles/kernel/osl/shaders/node_normal_map.osl b/intern/cycles/kernel/osl/shaders/node_normal_map.osl
index 3cda485c686..7e41bbf1720 100644
--- a/intern/cycles/kernel/osl/shaders/node_normal_map.osl
+++ b/intern/cycles/kernel/osl/shaders/node_normal_map.osl
@@ -3,13 +3,12 @@
#include "stdcycles.h"
-shader node_normal_map(normal NormalIn = N,
- float Strength = 1.0,
+shader node_normal_map(float Strength = 1.0,
color Color = color(0.5, 0.5, 1.0),
string space = "tangent",
string attr_name = "geom:tangent",
string attr_sign_name = "geom:tangent_sign",
- output normal Normal = NormalIn)
+ output normal Normal = N)
{
color mcolor = 2.0 * color(Color[0] - 0.5, Color[1] - 0.5, Color[2] - 0.5);
int is_backfacing = backfacing();
@@ -71,5 +70,5 @@ shader node_normal_map(normal NormalIn = N,
}
if (Strength != 1.0)
- Normal = normalize(NormalIn + (Normal - NormalIn) * max(Strength, 0.0));
+ Normal = normalize(N + (Normal - N) * max(Strength, 0.0));
}
diff --git a/intern/cycles/kernel/osl/shaders/node_tangent.osl b/intern/cycles/kernel/osl/shaders/node_tangent.osl
index a302c001f08..b3808778b2f 100644
--- a/intern/cycles/kernel/osl/shaders/node_tangent.osl
+++ b/intern/cycles/kernel/osl/shaders/node_tangent.osl
@@ -3,8 +3,7 @@
#include "stdcycles.h"
-shader node_tangent(normal NormalIn = N,
- string attr_name = "geom:tangent",
+shader node_tangent(string attr_name = "geom:tangent",
string direction_type = "radial",
string axis = "z",
output normal Tangent = normalize(dPdu))
@@ -29,5 +28,5 @@ shader node_tangent(normal NormalIn = N,
}
T = transform("object", "world", T);
- Tangent = cross(NormalIn, normalize(cross(T, NormalIn)));
+ Tangent = cross(N, normalize(cross(T, N)));
}
diff --git a/intern/cycles/kernel/osl/shaders/node_texture_coordinate.osl b/intern/cycles/kernel/osl/shaders/node_texture_coordinate.osl
index 24875ce140a..cd2fdae3cb3 100644
--- a/intern/cycles/kernel/osl/shaders/node_texture_coordinate.osl
+++ b/intern/cycles/kernel/osl/shaders/node_texture_coordinate.osl
@@ -4,7 +4,6 @@
#include "stdcycles.h"
shader node_texture_coordinate(
- normal NormalIn = N,
int is_background = 0,
int is_volume = 0,
int from_dupli = 0,
@@ -27,7 +26,7 @@ shader node_texture_coordinate(
point Pcam = transform("camera", "world", point(0, 0, 0));
Camera = transform("camera", P + Pcam);
getattribute("NDC", Window);
- Normal = NormalIn;
+ Normal = N;
Reflection = I;
}
else {
@@ -59,8 +58,8 @@ shader node_texture_coordinate(
}
Camera = transform("camera", P);
Window = transform("NDC", P);
- Normal = transform("world", "object", NormalIn);
- Reflection = -reflect(I, NormalIn);
+ Normal = transform("world", "object", N);
+ Reflection = -reflect(I, N);
}
if (bump_offset == "dx") {
diff --git a/intern/cycles/kernel/osl/types.h b/intern/cycles/kernel/osl/types.h
index 46e06114360..717306a3d07 100644
--- a/intern/cycles/kernel/osl/types.h
+++ b/intern/cycles/kernel/osl/types.h
@@ -5,9 +5,53 @@
CCL_NAMESPACE_BEGIN
+struct DeviceString {
+#if defined(__KERNEL_GPU__)
+ /* Strings are represented by their hashes in CUDA and OptiX. */
+ size_t str_;
+
+ ccl_device_inline_method uint64_t hash() const
+ {
+ return str_;
+ }
+#elif defined(OPENIMAGEIO_USTRING_H)
+ ustring str_;
+
+ ccl_device_inline_method uint64_t hash() const
+ {
+ return str_.hash();
+ }
+#else
+ const char *str_;
+#endif
+
+ ccl_device_inline_method bool operator==(DeviceString b) const
+ {
+ return str_ == b.str_;
+ }
+ ccl_device_inline_method bool operator!=(DeviceString b) const
+ {
+ return str_ != b.str_;
+ }
+};
+
+ccl_device_inline DeviceString make_string(const char *str, size_t hash)
+{
+#if defined(__KERNEL_GPU__)
+ (void)str;
+ return {hash};
+#elif defined(OPENIMAGEIO_USTRING_H)
+ (void)hash;
+ return {ustring(str)};
+#else
+ (void)hash;
+ return {str};
+#endif
+}
+
/* Closure */
-enum ClosureTypeOSL {
+enum OSLClosureType {
OSL_CLOSURE_MUL_ID = -1,
OSL_CLOSURE_ADD_ID = -2,
@@ -17,4 +61,60 @@ enum ClosureTypeOSL {
#include "closures_template.h"
};
+struct OSLClosure {
+ OSLClosureType id;
+};
+
+struct ccl_align(8) OSLClosureMul : public OSLClosure
+{
+ packed_float3 weight;
+ ccl_private const OSLClosure *closure;
+};
+
+struct ccl_align(8) OSLClosureAdd : public OSLClosure
+{
+ ccl_private const OSLClosure *closureA;
+ ccl_private const OSLClosure *closureB;
+};
+
+struct ccl_align(8) OSLClosureComponent : public OSLClosure
+{
+ packed_float3 weight;
+};
+
+/* Globals */
+
+struct ShaderGlobals {
+ packed_float3 P, dPdx, dPdy;
+ packed_float3 dPdz;
+ packed_float3 I, dIdx, dIdy;
+ packed_float3 N;
+ packed_float3 Ng;
+ float u, dudx, dudy;
+ float v, dvdx, dvdy;
+ packed_float3 dPdu, dPdv;
+ float time;
+ float dtime;
+ packed_float3 dPdtime;
+ packed_float3 Ps, dPsdx, dPsdy;
+ ccl_private void *renderstate;
+ ccl_private void *tracedata;
+ ccl_private void *objdata;
+ void *context;
+ void *renderer;
+ ccl_private void *object2common;
+ ccl_private void *shader2common;
+ ccl_private OSLClosure *Ci;
+ float surfacearea;
+ int raytype;
+ int flipHandedness;
+ int backfacing;
+};
+
+struct OSLNoiseOptions {
+};
+
+struct OSLTextureOptions {
+};
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/noise.h b/intern/cycles/kernel/svm/noise.h
index 31e77d87413..209195a03f1 100644
--- a/intern/cycles/kernel/svm/noise.h
+++ b/intern/cycles/kernel/svm/noise.h
@@ -39,11 +39,11 @@ ccl_device_noinline_cpu float perlin_1d(float x)
}
/* 2D, 3D, and 4D noise can be accelerated using SSE, so we first check if
- * SSE is supported, that is, if __KERNEL_SSE2__ is defined. If it is not
+ * SSE is supported, that is, if __KERNEL_SSE__ is defined. If it is not
* supported, we do a standard implementation, but if it is supported, we
* do an implementation using SSE intrinsics.
*/
-#if !defined(__KERNEL_SSE2__)
+#if !defined(__KERNEL_SSE__)
/* ** Standard Implementation ** */
@@ -250,18 +250,18 @@ ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
/* SSE Bilinear Interpolation:
*
- * The function takes two ssef inputs:
+ * The function takes two float4 inputs:
* - p : Contains the values at the points (v0, v1, v2, v3).
* - f : Contains the values (x, y, _, _). The third and fourth values are unused.
*
* The interpolation is done in two steps:
* 1. Interpolate (v0, v1) and (v2, v3) along the x axis to get g (g0, g1).
* (v2, v3) is generated by moving v2 and v3 to the first and second
- * places of the ssef using the shuffle mask <2, 3, 2, 3>. The third and
+ * places of the float4 using the shuffle mask <2, 3, 2, 3>. The third and
* fourth values are unused.
* 2. Interpolate g0 and g1 along the y axis to get the final value.
- * g1 is generated by populating an ssef with the second value of g.
- * Only the first value is important in the final ssef.
+ * g1 is generated by populating an float4 with the second value of g.
+ * Only the first value is important in the final float4.
*
* v1 v3 g1
* @ + + + + @ @ y
@@ -272,27 +272,27 @@ ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
* v0 v2 g0
*
*/
-ccl_device_inline ssef bi_mix(ssef p, ssef f)
+ccl_device_inline float4 bi_mix(float4 p, float4 f)
{
- ssef g = mix(p, shuffle<2, 3, 2, 3>(p), shuffle<0>(f));
+ float4 g = mix(p, shuffle<2, 3, 2, 3>(p), shuffle<0>(f));
return mix(g, shuffle<1>(g), shuffle<1>(f));
}
-ccl_device_inline ssef fade(const ssef &t)
+ccl_device_inline float4 fade(const float4 t)
{
- ssef a = madd(t, 6.0f, -15.0f);
- ssef b = madd(t, a, 10.0f);
+ float4 a = madd(t, make_float4(6.0f), make_float4(-15.0f));
+ float4 b = madd(t, a, make_float4(10.0f));
return (t * t) * (t * b);
}
/* Negate val if the nth bit of h is 1. */
# define negate_if_nth_bit(val, h, n) ((val) ^ cast(((h) & (1 << (n))) << (31 - (n))))
-ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y)
+ccl_device_inline float4 grad(const int4 hash, const float4 x, const float4 y)
{
- ssei h = hash & 7;
- ssef u = select(h < 4, x, y);
- ssef v = 2.0f * select(h < 4, y, x);
+ int4 h = hash & 7;
+ float4 u = select(h < 4, x, y);
+ float4 v = 2.0f * select(h < 4, y, x);
return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
}
@@ -310,28 +310,28 @@ ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y)
*/
ccl_device_noinline_cpu float perlin_2d(float x, float y)
{
- ssei XY;
- ssef fxy = floorfrac(ssef(x, y, 0.0f, 0.0f), &XY);
- ssef uv = fade(fxy);
+ int4 XY;
+ float4 fxy = floorfrac(make_float4(x, y, 0.0f, 0.0f), &XY);
+ float4 uv = fade(fxy);
- ssei XY1 = XY + 1;
- ssei X = shuffle<0, 0, 0, 0>(XY, XY1);
- ssei Y = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(XY, XY1));
+ int4 XY1 = XY + make_int4(1);
+ int4 X = shuffle<0, 0, 0, 0>(XY, XY1);
+ int4 Y = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(XY, XY1));
- ssei h = hash_ssei2(X, Y);
+ int4 h = hash_int4_2(X, Y);
- ssef fxy1 = fxy - 1.0f;
- ssef fx = shuffle<0, 0, 0, 0>(fxy, fxy1);
- ssef fy = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(fxy, fxy1));
+ float4 fxy1 = fxy - make_float4(1.0f);
+ float4 fx = shuffle<0, 0, 0, 0>(fxy, fxy1);
+ float4 fy = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(fxy, fxy1));
- ssef g = grad(h, fx, fy);
+ float4 g = grad(h, fx, fy);
return extract<0>(bi_mix(g, uv));
}
/* SSE Trilinear Interpolation:
*
- * The function takes three ssef inputs:
+ * The function takes three float4 inputs:
* - p : Contains the values at the points (v0, v1, v2, v3).
* - q : Contains the values at the points (v4, v5, v6, v7).
* - f : Contains the values (x, y, z, _). The fourth value is unused.
@@ -340,11 +340,11 @@ ccl_device_noinline_cpu float perlin_2d(float x, float y)
* 1. Interpolate p and q along the x axis to get s (s0, s1, s2, s3).
* 2. Interpolate (s0, s1) and (s2, s3) along the y axis to get g (g0, g1).
* (s2, s3) is generated by moving v2 and v3 to the first and second
- * places of the ssef using the shuffle mask <2, 3, 2, 3>. The third and
+ * places of the float4 using the shuffle mask <2, 3, 2, 3>. The third and
* fourth values are unused.
* 3. Interpolate g0 and g1 along the z axis to get the final value.
- * g1 is generated by populating an ssef with the second value of g.
- * Only the first value is important in the final ssef.
+ * g1 is generated by populating an float4 with the second value of g.
+ * Only the first value is important in the final float4.
*
* v3 v7
* @ + + + + + + @ s3 @
@@ -362,10 +362,10 @@ ccl_device_noinline_cpu float perlin_2d(float x, float y)
* @ + + + + + + @ @
* v0 v4 s0
*/
-ccl_device_inline ssef tri_mix(ssef p, ssef q, ssef f)
+ccl_device_inline float4 tri_mix(float4 p, float4 q, float4 f)
{
- ssef s = mix(p, q, shuffle<0>(f));
- ssef g = mix(s, shuffle<2, 3, 2, 3>(s), shuffle<1>(f));
+ float4 s = mix(p, q, shuffle<0>(f));
+ float4 g = mix(s, shuffle<2, 3, 2, 3>(s), shuffle<1>(f));
return mix(g, shuffle<1>(g), shuffle<2>(f));
}
@@ -374,24 +374,24 @@ ccl_device_inline ssef tri_mix(ssef p, ssef q, ssef f)
* supported, we do an SSE implementation, but if it is supported,
* we do an implementation using AVX intrinsics.
*/
-# if !defined(__KERNEL_AVX__)
+# if !defined(__KERNEL_AVX2__)
-ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y, const ssef &z)
+ccl_device_inline float4 grad(const int4 hash, const float4 x, const float4 y, const float4 z)
{
- ssei h = hash & 15;
- ssef u = select(h < 8, x, y);
- ssef vt = select((h == 12) | (h == 14), x, z);
- ssef v = select(h < 4, y, vt);
+ int4 h = hash & 15;
+ float4 u = select(h < 8, x, y);
+ float4 vt = select((h == 12) | (h == 14), x, z);
+ float4 v = select(h < 4, y, vt);
return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
}
-ccl_device_inline ssef
-grad(const ssei &hash, const ssef &x, const ssef &y, const ssef &z, const ssef &w)
+ccl_device_inline float4
+grad(const int4 hash, const float4 x, const float4 y, const float4 z, const float4 w)
{
- ssei h = hash & 31;
- ssef u = select(h < 24, x, y);
- ssef v = select(h < 16, y, z);
- ssef s = select(h < 8, z, w);
+ int4 h = hash & 31;
+ float4 u = select(h < 24, x, y);
+ float4 v = select(h < 16, y, z);
+ float4 s = select(h < 8, z, w);
return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1) + negate_if_nth_bit(s, h, 2);
}
@@ -401,7 +401,7 @@ grad(const ssei &hash, const ssef &x, const ssef &y, const ssef &z, const ssef &
* between two trilinear interpolations.
*
*/
-ccl_device_inline ssef quad_mix(ssef p, ssef q, ssef r, ssef s, ssef f)
+ccl_device_inline float4 quad_mix(float4 p, float4 q, float4 r, float4 s, float4 f)
{
return mix(tri_mix(p, q, f), tri_mix(r, s, f), shuffle<3>(f));
}
@@ -427,23 +427,23 @@ ccl_device_inline ssef quad_mix(ssef p, ssef q, ssef r, ssef s, ssef f)
*/
ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
{
- ssei XYZ;
- ssef fxyz = floorfrac(ssef(x, y, z, 0.0f), &XYZ);
- ssef uvw = fade(fxyz);
+ int4 XYZ;
+ float4 fxyz = floorfrac(make_float4(x, y, z, 0.0f), &XYZ);
+ float4 uvw = fade(fxyz);
- ssei XYZ1 = XYZ + 1;
- ssei Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
- ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
+ int4 XYZ1 = XYZ + make_int4(1);
+ int4 Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
+ int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
- ssei h1 = hash_ssei3(shuffle<0>(XYZ), Y, Z);
- ssei h2 = hash_ssei3(shuffle<0>(XYZ1), Y, Z);
+ int4 h1 = hash_int4_3(shuffle<0>(XYZ), Y, Z);
+ int4 h2 = hash_int4_3(shuffle<0>(XYZ1), Y, Z);
- ssef fxyz1 = fxyz - 1.0f;
- ssef fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
- ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
+ float4 fxyz1 = fxyz - make_float4(1.0f);
+ float4 fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
+ float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
- ssef g1 = grad(h1, shuffle<0>(fxyz), fy, fz);
- ssef g2 = grad(h2, shuffle<0>(fxyz1), fy, fz);
+ float4 g1 = grad(h1, shuffle<0>(fxyz), fy, fz);
+ float4 g2 = grad(h2, shuffle<0>(fxyz1), fy, fz);
return extract<0>(tri_mix(g1, g2, uvw));
}
@@ -481,29 +481,29 @@ ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
*/
ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
{
- ssei XYZW;
- ssef fxyzw = floorfrac(ssef(x, y, z, w), &XYZW);
- ssef uvws = fade(fxyzw);
+ int4 XYZW;
+ float4 fxyzw = floorfrac(make_float4(x, y, z, w), &XYZW);
+ float4 uvws = fade(fxyzw);
- ssei XYZW1 = XYZW + 1;
- ssei Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
- ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
+ int4 XYZW1 = XYZW + make_int4(1);
+ int4 Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
+ int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
- ssei h1 = hash_ssei4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW));
- ssei h2 = hash_ssei4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW));
+ int4 h1 = hash_int4_4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW));
+ int4 h2 = hash_int4_4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW));
- ssei h3 = hash_ssei4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW1));
- ssei h4 = hash_ssei4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW1));
+ int4 h3 = hash_int4_4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW1));
+ int4 h4 = hash_int4_4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW1));
- ssef fxyzw1 = fxyzw - 1.0f;
- ssef fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
- ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
+ float4 fxyzw1 = fxyzw - make_float4(1.0f);
+ float4 fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
+ float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
- ssef g1 = grad(h1, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw));
- ssef g2 = grad(h2, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw));
+ float4 g1 = grad(h1, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw));
+ float4 g2 = grad(h2, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw));
- ssef g3 = grad(h3, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw1));
- ssef g4 = grad(h4, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw1));
+ float4 g3 = grad(h3, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw1));
+ float4 g4 = grad(h4, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw1));
return extract<0>(quad_mix(g1, g2, g3, g4, uvws));
}
@@ -512,22 +512,22 @@ ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
/* AVX Implementation */
-ccl_device_inline avxf grad(const avxi &hash, const avxf &x, const avxf &y, const avxf &z)
+ccl_device_inline vfloat8 grad(const vint8 hash, const vfloat8 x, const vfloat8 y, const vfloat8 z)
{
- avxi h = hash & 15;
- avxf u = select(h < 8, x, y);
- avxf vt = select((h == 12) | (h == 14), x, z);
- avxf v = select(h < 4, y, vt);
+ vint8 h = hash & 15;
+ vfloat8 u = select(h < 8, x, y);
+ vfloat8 vt = select((h == 12) | (h == 14), x, z);
+ vfloat8 v = select(h < 4, y, vt);
return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
}
-ccl_device_inline avxf
-grad(const avxi &hash, const avxf &x, const avxf &y, const avxf &z, const avxf &w)
+ccl_device_inline vfloat8
+grad(const vint8 hash, const vfloat8 x, const vfloat8 y, const vfloat8 z, const vfloat8 w)
{
- avxi h = hash & 31;
- avxf u = select(h < 24, x, y);
- avxf v = select(h < 16, y, z);
- avxf s = select(h < 8, z, w);
+ vint8 h = hash & 31;
+ vfloat8 u = select(h < 24, x, y);
+ vfloat8 v = select(h < 16, y, z);
+ vfloat8 s = select(h < 8, z, w);
return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1) + negate_if_nth_bit(s, h, 2);
}
@@ -537,13 +537,13 @@ grad(const avxi &hash, const avxf &x, const avxf &y, const avxf &z, const avxf &
* 1. Interpolate p and q along the w axis to get s.
* 2. Trilinearly interpolate (s0, s1, s2, s3) and (s4, s5, s6, s7) to get the final
* value. (s0, s1, s2, s3) and (s4, s5, s6, s7) are generated by extracting the
- * low and high ssef from s.
+ * low and high float4 from s.
*
*/
-ccl_device_inline ssef quad_mix(avxf p, avxf q, ssef f)
+ccl_device_inline float4 quad_mix(vfloat8 p, vfloat8 q, float4 f)
{
- ssef fv = shuffle<3>(f);
- avxf s = mix(p, q, avxf(fv, fv));
+ float4 fv = shuffle<3>(f);
+ vfloat8 s = mix(p, q, make_vfloat8(fv, fv));
return tri_mix(low(s), high(s), f);
}
@@ -565,25 +565,25 @@ ccl_device_inline ssef quad_mix(avxf p, avxf q, ssef f)
*/
ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
{
- ssei XYZ;
- ssef fxyz = floorfrac(ssef(x, y, z, 0.0f), &XYZ);
- ssef uvw = fade(fxyz);
+ int4 XYZ;
+ float4 fxyz = floorfrac(make_float4(x, y, z, 0.0f), &XYZ);
+ float4 uvw = fade(fxyz);
- ssei XYZ1 = XYZ + 1;
- ssei X = shuffle<0>(XYZ);
- ssei X1 = shuffle<0>(XYZ1);
- ssei Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
- ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
+ int4 XYZ1 = XYZ + make_int4(1);
+ int4 X = shuffle<0>(XYZ);
+ int4 X1 = shuffle<0>(XYZ1);
+ int4 Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
+ int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
- avxi h = hash_avxi3(avxi(X, X1), avxi(Y, Y), avxi(Z, Z));
+ vint8 h = hash_int8_3(make_vint8(X, X1), make_vint8(Y, Y), make_vint8(Z, Z));
- ssef fxyz1 = fxyz - 1.0f;
- ssef fx = shuffle<0>(fxyz);
- ssef fx1 = shuffle<0>(fxyz1);
- ssef fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
- ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
+ float4 fxyz1 = fxyz - make_float4(1.0f);
+ float4 fx = shuffle<0>(fxyz);
+ float4 fx1 = shuffle<0>(fxyz1);
+ float4 fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
+ float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
- avxf g = grad(h, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz));
+ vfloat8 g = grad(h, make_vfloat8(fx, fx1), make_vfloat8(fy, fy), make_vfloat8(fz, fz));
return extract<0>(tri_mix(low(g), high(g), uvw));
}
@@ -617,31 +617,37 @@ ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
*/
ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
{
- ssei XYZW;
- ssef fxyzw = floorfrac(ssef(x, y, z, w), &XYZW);
- ssef uvws = fade(fxyzw);
-
- ssei XYZW1 = XYZW + 1;
- ssei X = shuffle<0>(XYZW);
- ssei X1 = shuffle<0>(XYZW1);
- ssei Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
- ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
- ssei W = shuffle<3>(XYZW);
- ssei W1 = shuffle<3>(XYZW1);
-
- avxi h1 = hash_avxi4(avxi(X, X1), avxi(Y, Y), avxi(Z, Z), avxi(W, W));
- avxi h2 = hash_avxi4(avxi(X, X1), avxi(Y, Y), avxi(Z, Z), avxi(W1, W1));
-
- ssef fxyzw1 = fxyzw - 1.0f;
- ssef fx = shuffle<0>(fxyzw);
- ssef fx1 = shuffle<0>(fxyzw1);
- ssef fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
- ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
- ssef fw = shuffle<3>(fxyzw);
- ssef fw1 = shuffle<3>(fxyzw1);
-
- avxf g1 = grad(h1, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz), avxf(fw, fw));
- avxf g2 = grad(h2, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz), avxf(fw1, fw1));
+ int4 XYZW;
+ float4 fxyzw = floorfrac(make_float4(x, y, z, w), &XYZW);
+ float4 uvws = fade(fxyzw);
+
+ int4 XYZW1 = XYZW + make_int4(1);
+ int4 X = shuffle<0>(XYZW);
+ int4 X1 = shuffle<0>(XYZW1);
+ int4 Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
+ int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
+ int4 W = shuffle<3>(XYZW);
+ int4 W1 = shuffle<3>(XYZW1);
+
+ vint8 h1 = hash_int8_4(make_vint8(X, X1), make_vint8(Y, Y), make_vint8(Z, Z), make_vint8(W, W));
+ vint8 h2 = hash_int8_4(
+ make_vint8(X, X1), make_vint8(Y, Y), make_vint8(Z, Z), make_vint8(W1, W1));
+
+ float4 fxyzw1 = fxyzw - make_float4(1.0f);
+ float4 fx = shuffle<0>(fxyzw);
+ float4 fx1 = shuffle<0>(fxyzw1);
+ float4 fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
+ float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
+ float4 fw = shuffle<3>(fxyzw);
+ float4 fw1 = shuffle<3>(fxyzw1);
+
+ vfloat8 g1 = grad(
+ h1, make_vfloat8(fx, fx1), make_vfloat8(fy, fy), make_vfloat8(fz, fz), make_vfloat8(fw, fw));
+ vfloat8 g2 = grad(h2,
+ make_vfloat8(fx, fx1),
+ make_vfloat8(fy, fy),
+ make_vfloat8(fz, fz),
+ make_vfloat8(fw1, fw1));
return extract<0>(quad_mix(g1, g2, uvws));
}
diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h
index 24c5a6a4540..a6f8914a9b8 100644
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -75,10 +75,14 @@ CCL_NAMESPACE_BEGIN
#define __VOLUME__
/* Device specific features */
-#ifndef __KERNEL_GPU__
-# ifdef WITH_OSL
-# define __OSL__
+#ifdef WITH_OSL
+# define __OSL__
+# ifdef __KERNEL_OPTIX__
+/* Kernels with OSL support are built separately in OptiX and don't need SVM. */
+# undef __SVM__
# endif
+#endif
+#ifndef __KERNEL_GPU__
# ifdef WITH_PATH_GUIDING
# define __PATH_GUIDING__
# endif
@@ -917,9 +921,13 @@ typedef struct ccl_align(16) ShaderData
float ray_dP;
#ifdef __OSL__
+# ifdef __KERNEL_GPU__
+ ccl_private uint8_t *osl_closure_pool;
+# else
const struct KernelGlobalsCPU *osl_globals;
const struct IntegratorStateCPU *osl_path_state;
const struct IntegratorShadowStateCPU *osl_shadow_path_state;
+# endif
#endif
/* LCG state for closures that require additional random numbers. */
@@ -1529,6 +1537,9 @@ enum KernelFeatureFlag : uint32_t {
/* Path guiding. */
KERNEL_FEATURE_PATH_GUIDING = (1U << 26U),
+
+ /* OSL. */
+ KERNEL_FEATURE_OSL = (1U << 27U),
};
/* Shader node feature mask, to specialize shader evaluation for kernels. */