Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/device')
-rw-r--r--intern/cycles/kernel/device/cpu/compat.h14
-rw-r--r--intern/cycles/kernel/device/cpu/globals.h26
-rw-r--r--intern/cycles/kernel/device/cpu/image.h14
-rw-r--r--intern/cycles/kernel/device/cpu/kernel.cpp8
-rw-r--r--intern/cycles/kernel/device/cuda/globals.h26
-rw-r--r--intern/cycles/kernel/device/gpu/image.h17
-rw-r--r--intern/cycles/kernel/device/gpu/kernel.h29
-rw-r--r--intern/cycles/kernel/device/hip/globals.h26
-rw-r--r--intern/cycles/kernel/device/metal/compat.h24
-rw-r--r--intern/cycles/kernel/device/metal/context_end.h2
-rw-r--r--intern/cycles/kernel/device/metal/globals.h18
-rw-r--r--intern/cycles/kernel/device/metal/kernel.metal54
-rw-r--r--intern/cycles/kernel/device/optix/globals.h16
-rw-r--r--intern/cycles/kernel/device/optix/kernel.cu48
-rw-r--r--intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu12
15 files changed, 201 insertions, 133 deletions
diff --git a/intern/cycles/kernel/device/cpu/compat.h b/intern/cycles/kernel/device/cpu/compat.h
index e1c20169582..3bfc37e98ee 100644
--- a/intern/cycles/kernel/device/cpu/compat.h
+++ b/intern/cycles/kernel/device/cpu/compat.h
@@ -35,20 +35,6 @@ CCL_NAMESPACE_BEGIN
#define kernel_assert(cond) assert(cond)
-/* Texture types to be compatible with CUDA textures. These are really just
- * simple arrays and after inlining fetch hopefully revert to being a simple
- * pointer lookup. */
-template<typename T> struct texture {
- ccl_always_inline const T &fetch(int index) const
- {
- kernel_assert(index >= 0 && index < width);
- return data[index];
- }
-
- T *data;
- int width;
-};
-
/* Macros to handle different memory storage on different devices */
#ifdef __KERNEL_SSE2__
diff --git a/intern/cycles/kernel/device/cpu/globals.h b/intern/cycles/kernel/device/cpu/globals.h
index 7e080d428ea..309afae412e 100644
--- a/intern/cycles/kernel/device/cpu/globals.h
+++ b/intern/cycles/kernel/device/cpu/globals.h
@@ -12,7 +12,7 @@
CCL_NAMESPACE_BEGIN
/* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in
- * the kernel, to access constant data. These are all stored as "textures", but
+ * the kernel, to access constant data. These are all stored as flat arrays.
* these are really just standard arrays. We can't use actually globals because
* multiple renders may be running inside the same process. */
@@ -22,11 +22,23 @@ struct OSLThreadData;
struct OSLShadingSystem;
#endif
+/* Array for kernel data, with size to be able to assert on invalid data access. */
+template<typename T> struct kernel_array {
+ ccl_always_inline const T &fetch(int index) const
+ {
+ kernel_assert(index >= 0 && index < width);
+ return data[index];
+ }
+
+ T *data;
+ int width;
+};
+
typedef struct KernelGlobalsCPU {
-#define KERNEL_TEX(type, name) texture<type> name;
-#include "kernel/textures.h"
+#define KERNEL_DATA_ARRAY(type, name) kernel_array<type> name;
+#include "kernel/data_arrays.h"
- KernelData __data;
+ KernelData data;
#ifdef __OSL__
/* On the CPU, we also have the OSL globals here. Most data structures are shared
@@ -44,8 +56,8 @@ typedef struct KernelGlobalsCPU {
typedef const KernelGlobalsCPU *ccl_restrict KernelGlobals;
/* Abstraction macros */
-#define kernel_tex_fetch(tex, index) (kg->tex.fetch(index))
-#define kernel_tex_array(tex) (kg->tex.data)
-#define kernel_data (kg->__data)
+#define kernel_data_fetch(name, index) (kg->name.fetch(index))
+#define kernel_data_array(name) (kg->name.data)
+#define kernel_data (kg->data)
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/cpu/image.h b/intern/cycles/kernel/device/cpu/image.h
index 3b714a3e580..320e6309128 100644
--- a/intern/cycles/kernel/device/cpu/image.h
+++ b/intern/cycles/kernel/device/cpu/image.h
@@ -733,7 +733,7 @@ template<typename TexT, typename OutT = float4> struct NanoVDBInterpolator {
ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int id, float x, float y)
{
- const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+ const TextureInfo &info = kernel_data_fetch(texture_info, id);
if (UNLIKELY(!info.data)) {
return zero_float4();
@@ -776,7 +776,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
float3 P,
InterpolationType interp)
{
- const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+ const TextureInfo &info = kernel_data_fetch(texture_info, id);
if (UNLIKELY(!info.data)) {
return zero_float4();
@@ -817,6 +817,16 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
}
case IMAGE_DATA_TYPE_NANOVDB_FLOAT3:
return NanoVDBInterpolator<nanovdb::Vec3f>::interp_3d(info, P.x, P.y, P.z, interp);
+ case IMAGE_DATA_TYPE_NANOVDB_FPN: {
+ const float f = NanoVDBInterpolator<nanovdb::FpN, float>::interp_3d(
+ info, P.x, P.y, P.z, interp);
+ return make_float4(f, f, f, 1.0f);
+ }
+ case IMAGE_DATA_TYPE_NANOVDB_FP16: {
+ const float f = NanoVDBInterpolator<nanovdb::Fp16, float>::interp_3d(
+ info, P.x, P.y, P.z, interp);
+ return make_float4(f, f, f, 1.0f);
+ }
#endif
default:
assert(0);
diff --git a/intern/cycles/kernel/device/cpu/kernel.cpp b/intern/cycles/kernel/device/cpu/kernel.cpp
index b12e3089378..01087c96dd6 100644
--- a/intern/cycles/kernel/device/cpu/kernel.cpp
+++ b/intern/cycles/kernel/device/cpu/kernel.cpp
@@ -53,8 +53,8 @@ CCL_NAMESPACE_BEGIN
void kernel_const_copy(KernelGlobalsCPU *kg, const char *name, void *host, size_t)
{
- if (strcmp(name, "__data") == 0) {
- kg->__data = *(KernelData *)host;
+ if (strcmp(name, "data") == 0) {
+ kg->data = *(KernelData *)host;
}
else {
assert(0);
@@ -66,13 +66,13 @@ void kernel_global_memory_copy(KernelGlobalsCPU *kg, const char *name, void *mem
if (0) {
}
-#define KERNEL_TEX(type, tname) \
+#define KERNEL_DATA_ARRAY(type, tname) \
else if (strcmp(name, #tname) == 0) \
{ \
kg->tname.data = (type *)mem; \
kg->tname.width = size; \
}
-#include "kernel/textures.h"
+#include "kernel/data_arrays.h"
else {
assert(0);
}
diff --git a/intern/cycles/kernel/device/cuda/globals.h b/intern/cycles/kernel/device/cuda/globals.h
index e77fcd2b424..f5f7bcf58ee 100644
--- a/intern/cycles/kernel/device/cuda/globals.h
+++ b/intern/cycles/kernel/device/cuda/globals.h
@@ -20,18 +20,24 @@ struct KernelGlobalsGPU {
};
typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals;
-/* Global scene data and textures */
-__constant__ KernelData __data;
-#define KERNEL_TEX(type, name) const __constant__ __device__ type *name;
-#include "kernel/textures.h"
+struct KernelParamsCUDA {
+ /* Global scene data and textures */
+ KernelData data;
+#define KERNEL_DATA_ARRAY(type, name) const type *name;
+#include "kernel/data_arrays.h"
+
+ /* Integrator state */
+ IntegratorStateGPU integrator_state;
+};
-/* Integrator state */
-__constant__ IntegratorStateGPU __integrator_state;
+#ifdef __KERNEL_GPU__
+__constant__ KernelParamsCUDA kernel_params;
+#endif
/* Abstraction macros */
-#define kernel_data __data
-#define kernel_tex_fetch(t, index) t[(index)]
-#define kernel_tex_array(t) (t)
-#define kernel_integrator_state __integrator_state
+#define kernel_data kernel_params.data
+#define kernel_data_fetch(name, index) kernel_params.name[(index)]
+#define kernel_data_array(name) (kernel_params.name)
+#define kernel_integrator_state kernel_params.integrator_state
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/gpu/image.h b/intern/cycles/kernel/device/gpu/image.h
index c5bc7d88e02..a8c72645569 100644
--- a/intern/cycles/kernel/device/gpu/image.h
+++ b/intern/cycles/kernel/device/gpu/image.h
@@ -125,7 +125,8 @@ kernel_tex_image_interp_tricubic(ccl_global const TextureInfo &info, float x, fl
#ifdef WITH_NANOVDB
template<typename T, typename S>
-ccl_device T kernel_tex_image_interp_tricubic_nanovdb(S &s, float x, float y, float z)
+ccl_device typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_tricubic_nanovdb(
+ S &s, float x, float y, float z)
{
float px = floorf(x);
float py = floorf(y);
@@ -157,7 +158,7 @@ ccl_device T kernel_tex_image_interp_tricubic_nanovdb(S &s, float x, float y, fl
}
template<typename T>
-ccl_device_noinline T kernel_tex_image_interp_nanovdb(
+ccl_device_noinline typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_nanovdb(
ccl_global const TextureInfo &info, float x, float y, float z, uint interpolation)
{
using namespace nanovdb;
@@ -180,7 +181,7 @@ ccl_device_noinline T kernel_tex_image_interp_nanovdb(
ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int id, float x, float y)
{
- ccl_global const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+ ccl_global const TextureInfo &info = kernel_data_fetch(texture_info, id);
/* float4, byte4, ushort4 and half4 */
const int texture_type = info.data_type;
@@ -215,7 +216,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
float3 P,
InterpolationType interp)
{
- ccl_global const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+ ccl_global const TextureInfo &info = kernel_data_fetch(texture_info, id);
if (info.use_transform_3d) {
P = transform_point(&info.transform_3d, P);
@@ -238,6 +239,14 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
info, x, y, z, interpolation);
return make_float4(f[0], f[1], f[2], 1.0f);
}
+ if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FPN) {
+ float f = kernel_tex_image_interp_nanovdb<nanovdb::FpN>(info, x, y, z, interpolation);
+ return make_float4(f, f, f, 1.0f);
+ }
+ if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FP16) {
+ float f = kernel_tex_image_interp_nanovdb<nanovdb::Fp16>(info, x, y, z, interpolation);
+ return make_float4(f, f, f, 1.0f);
+ }
#endif
if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index 82b51843864..d657571a5fa 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -241,7 +241,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
ccl_gpu_kernel_postfix
-#ifdef __KERNEL_METAL__
+#if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__)
constant int __dummy_constant [[function_constant(0)]];
#endif
@@ -256,7 +256,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
if (global_index < work_size) {
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
-#ifdef __KERNEL_METAL__
+#if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__)
KernelGlobals kg = NULL;
/* Workaround Ambient Occlusion and Bevel nodes not working with Metal.
* Dummy offset should not affect result, but somehow fixes bug! */
@@ -270,6 +270,21 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
+ ccl_gpu_kernel_signature(integrator_shade_surface_mnee,
+ ccl_global const int *path_index_array,
+ ccl_global float *render_buffer,
+ const int work_size)
+{
+ const int global_index = ccl_gpu_global_id_x();
+
+ if (global_index < work_size) {
+ const int state = (path_index_array) ? path_index_array[global_index] : global_index;
+ ccl_gpu_kernel_call(integrator_shade_surface_mnee(NULL, state, render_buffer));
+ }
+}
+ccl_gpu_kernel_postfix
+
+ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_shade_volume,
ccl_global const int *path_index_array,
ccl_global float *render_buffer,
@@ -647,8 +662,9 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb
const int x = render_pixel_index % width; \
const int y = render_pixel_index / width; \
\
- ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert.pass_stride + \
- y * stride * kfilm_convert.pass_stride; \
+ const uint64_t buffer_pixel_index = x + y * stride; \
+ ccl_global const float *buffer = render_buffer + offset + \
+ buffer_pixel_index * kfilm_convert.pass_stride; \
\
ccl_global float *pixel = pixels + \
(render_pixel_index + rgba_offset) * kfilm_convert.pixel_stride; \
@@ -677,8 +693,9 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb
const int x = render_pixel_index % width; \
const int y = render_pixel_index / width; \
\
- ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert.pass_stride + \
- y * stride * kfilm_convert.pass_stride; \
+ const uint64_t buffer_pixel_index = x + y * stride; \
+ ccl_global const float *buffer = render_buffer + offset + \
+ buffer_pixel_index * kfilm_convert.pass_stride; \
\
float pixel[4]; \
film_get_pass_pixel_##variant(&kfilm_convert, buffer, pixel); \
diff --git a/intern/cycles/kernel/device/hip/globals.h b/intern/cycles/kernel/device/hip/globals.h
index 50f117038a2..3a334b21a9e 100644
--- a/intern/cycles/kernel/device/hip/globals.h
+++ b/intern/cycles/kernel/device/hip/globals.h
@@ -20,18 +20,24 @@ struct KernelGlobalsGPU {
};
typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals;
-/* Global scene data and textures */
-__constant__ KernelData __data;
-#define KERNEL_TEX(type, name) __attribute__((used)) const __constant__ __device__ type *name;
-#include "kernel/textures.h"
+struct KernelParamsHIP {
+ /* Global scene data and textures */
+ KernelData data;
+#define KERNEL_DATA_ARRAY(type, name) const type *name;
+#include "kernel/data_arrays.h"
+
+ /* Integrator state */
+ IntegratorStateGPU integrator_state;
+};
-/* Integrator state */
-__constant__ IntegratorStateGPU __integrator_state;
+#ifdef __KERNEL_GPU__
+__constant__ KernelParamsHIP kernel_params;
+#endif
/* Abstraction macros */
-#define kernel_data __data
-#define kernel_tex_fetch(t, index) t[(index)]
-#define kernel_tex_array(t) (t)
-#define kernel_integrator_state __integrator_state
+#define kernel_data kernel_params.data
+#define kernel_data_fetch(name, index) kernel_params.name[(index)]
+#define kernel_data_array(name) (kernel_params.name)
+#define kernel_integrator_state kernel_params.integrator_state
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h
index 4e309f16c08..0ed52074a90 100644
--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -29,10 +29,26 @@ using namespace metal::raytracing;
/* Qualifiers */
-#define ccl_device
-#define ccl_device_inline ccl_device
-#define ccl_device_forceinline ccl_device
-#define ccl_device_noinline ccl_device __attribute__((noinline))
+#if defined(__KERNEL_METAL_APPLE__)
+
+/* Inline everything for Apple GPUs.
+ * This gives ~1.1x speedup and 10% spill reduction for integator_shade_surface
+ * at the cost of longer compile times (~4.5 minutes on M1 Max). */
+
+# define ccl_device __attribute__((always_inline))
+# define ccl_device_inline __attribute__((always_inline))
+# define ccl_device_forceinline __attribute__((always_inline))
+# define ccl_device_noinline __attribute__((always_inline))
+
+#else
+
+# define ccl_device
+# define ccl_device_inline ccl_device
+# define ccl_device_forceinline ccl_device
+# define ccl_device_noinline ccl_device __attribute__((noinline))
+
+#endif
+
#define ccl_device_noinline_cpu ccl_device
#define ccl_device_inline_method ccl_device
#define ccl_global device
diff --git a/intern/cycles/kernel/device/metal/context_end.h b/intern/cycles/kernel/device/metal/context_end.h
index b4c8661c401..44ac0478266 100644
--- a/intern/cycles/kernel/device/metal/context_end.h
+++ b/intern/cycles/kernel/device/metal/context_end.h
@@ -7,4 +7,4 @@
/* NOTE: These macros will need maintaining as entry-points change. */
#undef kernel_integrator_state
-#define kernel_integrator_state context.launch_params_metal.__integrator_state
+#define kernel_integrator_state context.launch_params_metal.integrator_state
diff --git a/intern/cycles/kernel/device/metal/globals.h b/intern/cycles/kernel/device/metal/globals.h
index 1c3e775dbae..a336c096440 100644
--- a/intern/cycles/kernel/device/metal/globals.h
+++ b/intern/cycles/kernel/device/metal/globals.h
@@ -12,11 +12,11 @@ CCL_NAMESPACE_BEGIN
typedef struct KernelParamsMetal {
-#define KERNEL_TEX(type, name) ccl_global const type *name;
-#include "kernel/textures.h"
-#undef KERNEL_TEX
+#define KERNEL_DATA_ARRAY(type, name) ccl_global const type *name;
+#include "kernel/data_arrays.h"
+#undef KERNEL_DATA_ARRAY
- const IntegratorStateGPU __integrator_state;
+ const IntegratorStateGPU integrator_state;
const KernelData data;
} KernelParamsMetal;
@@ -27,12 +27,10 @@ typedef struct KernelGlobalsGPU {
typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals;
+/* Abstraction macros */
#define kernel_data launch_params_metal.data
-#define kernel_integrator_state launch_params_metal.__integrator_state
-
-/* data lookup defines */
-
-#define kernel_tex_fetch(tex, index) launch_params_metal.tex[index]
-#define kernel_tex_array(tex) launch_params_metal.tex
+#define kernel_data_fetch(name, index) launch_params_metal.name[index]
+#define kernel_data_array(name) launch_params_metal.name
+#define kernel_integrator_state launch_params_metal.integrator_state
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/metal/kernel.metal b/intern/cycles/kernel/device/metal/kernel.metal
index a7252570e64..3c31dc3354c 100644
--- a/intern/cycles/kernel/device/metal/kernel.metal
+++ b/intern/cycles/kernel/device/metal/kernel.metal
@@ -59,7 +59,7 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
TReturn result;
#ifdef __BVH_LOCAL__
- uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
+ uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
if ((object != payload.local_object) || intersection_skip_self_local(payload.self, prim)) {
/* Only intersect with matching object and skip self-intersecton. */
@@ -113,16 +113,16 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
isect->t = ray_tmax;
isect->prim = prim;
isect->object = object;
- isect->type = kernel_tex_fetch(__objects, object).primitive_type;
+ isect->type = kernel_data_fetch(objects, object).primitive_type;
isect->u = 1.0f - barycentrics.y - barycentrics.x;
isect->v = barycentrics.x;
/* Record geometric normal */
- const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect->prim).w;
- const float3 tri_a = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0));
- const float3 tri_b = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1));
- const float3 tri_c = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2));
+ const uint tri_vindex = kernel_data_fetch(tri_vindex, isect->prim).w;
+ const float3 tri_a = float3(kernel_data_fetch(tri_verts, tri_vindex + 0));
+ const float3 tri_b = float3(kernel_data_fetch(tri_verts, tri_vindex + 1));
+ const float3 tri_c = float3(kernel_data_fetch(tri_verts, tri_vindex + 2));
payload.local_isect.Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
/* Continue tracing (without this the trace call would return after the first hit) */
@@ -168,7 +168,7 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
#ifdef __SHADOW_RECORD_ALL__
# ifdef __VISIBILITY_FLAG__
const uint visibility = payload.visibility;
- if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
/* continue search */
return true;
}
@@ -184,14 +184,14 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
if (intersection_type == METALRT_HIT_TRIANGLE) {
u = 1.0f - barycentrics.y - barycentrics.x;
v = barycentrics.x;
- type = kernel_tex_fetch(__objects, object).primitive_type;
+ type = kernel_data_fetch(objects, object).primitive_type;
}
# ifdef __HAIR__
else {
u = barycentrics.x;
v = barycentrics.y;
- const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
type = segment.type;
prim = segment.prim;
@@ -294,7 +294,7 @@ __anyhit__cycles_metalrt_shadow_all_hit_tri(constant KernelParamsMetal &launch_p
float2 barycentrics [[barycentric_coord]],
float ray_tmax [[distance]])
{
- uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
+ uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
TriangleIntersectionResult result;
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_TRIANGLE>(
@@ -337,7 +337,7 @@ inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_pa
uint visibility = payload.visibility;
# ifdef __VISIBILITY_FLAG__
- if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
result.accept = false;
result.continue_search = true;
return result;
@@ -377,12 +377,12 @@ __anyhit__cycles_metalrt_visibility_test_tri(constant KernelParamsMetal &launch_
unsigned int object [[user_instance_id]],
unsigned int primitive_id [[primitive_id]])
{
- uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
+ uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
TriangleIntersectionResult result = metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
launch_params_metal, payload, object, prim, 0.0f);
if (result.accept) {
payload.prim = prim;
- payload.type = kernel_tex_fetch(__objects, object).primitive_type;
+ payload.type = kernel_data_fetch(objects, object).primitive_type;
}
return result;
}
@@ -414,7 +414,7 @@ void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal,
{
# ifdef __VISIBILITY_FLAG__
const uint visibility = payload.visibility;
- if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
return;
}
# endif
@@ -495,8 +495,8 @@ __intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[b
const float3 ray_direction [[direction]],
const float ray_tmax [[max_distance]])
{
- uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
- const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
+ uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
BoundingBoxIntersectionResult result;
result.accept = false;
@@ -526,8 +526,8 @@ __intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_me
const float3 ray_direction [[direction]],
const float ray_tmax [[max_distance]])
{
- uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
- const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
+ uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
BoundingBoxIntersectionResult result;
result.accept = false;
@@ -557,8 +557,8 @@ __intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buff
const float3 ray_direction [[direction]],
const float ray_tmax [[max_distance]])
{
- uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
- const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
+ uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
BoundingBoxIntersectionResult result;
result.accept = false;
@@ -585,8 +585,8 @@ __intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal
const float3 ray_direction [[direction]],
const float ray_tmax [[max_distance]])
{
- uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
- const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
+ uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
BoundingBoxIntersectionResult result;
result.accept = false;
@@ -620,7 +620,7 @@ void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal,
{
# ifdef __VISIBILITY_FLAG__
const uint visibility = payload.visibility;
- if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
return;
}
# endif
@@ -701,8 +701,8 @@ __intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1
const float3 ray_direction [[direction]],
const float ray_tmax [[max_distance]])
{
- const uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
- const int type = kernel_tex_fetch(__objects, object).primitive_type;
+ const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+ const int type = kernel_data_fetch(objects, object).primitive_type;
BoundingBoxIntersectionResult result;
result.accept = false;
@@ -730,8 +730,8 @@ __intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[b
const float3 ray_direction [[direction]],
const float ray_tmax [[max_distance]])
{
- const uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
- const int type = kernel_tex_fetch(__objects, object).primitive_type;
+ const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+ const int type = kernel_data_fetch(objects, object).primitive_type;
BoundingBoxIntersectionResult result;
result.accept = false;
diff --git a/intern/cycles/kernel/device/optix/globals.h b/intern/cycles/kernel/device/optix/globals.h
index bb752c531f0..7af2e421378 100644
--- a/intern/cycles/kernel/device/optix/globals.h
+++ b/intern/cycles/kernel/device/optix/globals.h
@@ -28,21 +28,21 @@ struct KernelParamsOptiX {
/* Global scene data and textures */
KernelData data;
-#define KERNEL_TEX(type, name) const type *name;
-#include "kernel/textures.h"
+#define KERNEL_DATA_ARRAY(type, name) const type *name;
+#include "kernel/data_arrays.h"
/* Integrator state */
- IntegratorStateGPU __integrator_state;
+ IntegratorStateGPU integrator_state;
};
#ifdef __NVCC__
-extern "C" static __constant__ KernelParamsOptiX __params;
+extern "C" static __constant__ KernelParamsOptiX kernel_params;
#endif
/* Abstraction macros */
-#define kernel_data __params.data
-#define kernel_tex_array(t) __params.t
-#define kernel_tex_fetch(t, index) __params.t[(index)]
-#define kernel_integrator_state __params.__integrator_state
+#define kernel_data kernel_params.data
+#define kernel_data_array(name) kernel_params.name
+#define kernel_data_fetch(name, index) kernel_params.name[(index)]
+#define kernel_integrator_state kernel_params.integrator_state
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu
index 9843b2e99be..949bf41d171 100644
--- a/intern/cycles/kernel/device/optix/kernel.cu
+++ b/intern/cycles/kernel/device/optix/kernel.cu
@@ -51,15 +51,15 @@ ccl_device_forceinline int get_object_id()
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest()
{
const int global_index = optixGetLaunchIndex().x;
- const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
+ const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
global_index;
- integrator_intersect_closest(nullptr, path_index, __params.render_buffer);
+ integrator_intersect_closest(nullptr, path_index, kernel_params.render_buffer);
}
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_shadow()
{
const int global_index = optixGetLaunchIndex().x;
- const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
+ const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
global_index;
integrator_intersect_shadow(nullptr, path_index);
}
@@ -67,7 +67,7 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_shadow()
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_subsurface()
{
const int global_index = optixGetLaunchIndex().x;
- const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
+ const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
global_index;
integrator_intersect_subsurface(nullptr, path_index);
}
@@ -75,7 +75,7 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_subsurfac
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_volume_stack()
{
const int global_index = optixGetLaunchIndex().x;
- const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
+ const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
global_index;
integrator_intersect_volume_stack(nullptr, path_index);
}
@@ -151,17 +151,17 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
isect->t = optixGetRayTmax();
isect->prim = prim;
isect->object = get_object_id();
- isect->type = kernel_tex_fetch(__objects, isect->object).primitive_type;
+ isect->type = kernel_data_fetch(objects, isect->object).primitive_type;
const float2 barycentrics = optixGetTriangleBarycentrics();
isect->u = 1.0f - barycentrics.y - barycentrics.x;
isect->v = barycentrics.x;
/* Record geometric normal. */
- const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w;
- const float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0);
- const float3 tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1);
- const float3 tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
+ const uint tri_vindex = kernel_data_fetch(tri_vindex, prim).w;
+ const float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0);
+ const float3 tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1);
+ const float3 tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
/* Continue tracing (without this the trace call would return after the first hit). */
@@ -176,7 +176,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
const uint object = get_object_id();
# ifdef __VISIBILITY_FLAG__
const uint visibility = optixGetPayload_4();
- if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
return optixIgnoreIntersection();
}
# endif
@@ -192,14 +192,14 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
const float2 barycentrics = optixGetTriangleBarycentrics();
u = 1.0f - barycentrics.y - barycentrics.x;
v = barycentrics.x;
- type = kernel_tex_fetch(__objects, object).primitive_type;
+ type = kernel_data_fetch(objects, object).primitive_type;
}
# ifdef __HAIR__
else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
u = __uint_as_float(optixGetAttribute_0());
v = __uint_as_float(optixGetAttribute_1());
- const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
type = segment.type;
prim = segment.prim;
@@ -212,7 +212,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
}
# endif
else {
- type = kernel_tex_fetch(__objects, object).primitive_type;
+ type = kernel_data_fetch(objects, object).primitive_type;
u = 0.0f;
v = 0.0f;
}
@@ -307,12 +307,12 @@ extern "C" __global__ void __anyhit__kernel_optix_volume_test()
const uint object = get_object_id();
#ifdef __VISIBILITY_FLAG__
const uint visibility = optixGetPayload_4();
- if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
return optixIgnoreIntersection();
}
#endif
- if ((kernel_tex_fetch(__object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) {
+ if ((kernel_data_fetch(object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) {
return optixIgnoreIntersection();
}
@@ -340,7 +340,7 @@ extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
const uint object = get_object_id();
const uint visibility = optixGetPayload_4();
#ifdef __VISIBILITY_FLAG__
- if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
return optixIgnoreIntersection();
}
#endif
@@ -377,10 +377,10 @@ extern "C" __global__ void __closesthit__kernel_optix_hit()
optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x));
optixSetPayload_2(__float_as_uint(barycentrics.x));
optixSetPayload_3(prim);
- optixSetPayload_5(kernel_tex_fetch(__objects, object).primitive_type);
+ optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
}
else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
- const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */
optixSetPayload_2(optixGetAttribute_1());
optixSetPayload_3(segment.prim);
@@ -390,7 +390,7 @@ extern "C" __global__ void __closesthit__kernel_optix_hit()
optixSetPayload_1(0);
optixSetPayload_2(0);
optixSetPayload_3(prim);
- optixSetPayload_5(kernel_tex_fetch(__objects, object).primitive_type);
+ optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
}
}
@@ -401,7 +401,7 @@ ccl_device_inline void optix_intersection_curve(const int prim, const int type)
# ifdef __VISIBILITY_FLAG__
const uint visibility = optixGetPayload_4();
- if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
return;
}
# endif
@@ -436,7 +436,7 @@ ccl_device_inline void optix_intersection_curve(const int prim, const int type)
extern "C" __global__ void __intersection__curve_ribbon()
{
- const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, optixGetPrimitiveIndex());
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, optixGetPrimitiveIndex());
const int prim = segment.prim;
const int type = segment.type;
if (type & PRIMITIVE_CURVE_RIBBON) {
@@ -451,11 +451,11 @@ extern "C" __global__ void __intersection__point()
{
const int prim = optixGetPrimitiveIndex();
const int object = get_object_id();
- const int type = kernel_tex_fetch(__objects, object).primitive_type;
+ const int type = kernel_data_fetch(objects, object).primitive_type;
# ifdef __VISIBILITY_FLAG__
const uint visibility = optixGetPayload_4();
- if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
return;
}
# endif
diff --git a/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu b/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu
index e2c5d2ff024..41e6224f6da 100644
--- a/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu
+++ b/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu
@@ -11,7 +11,15 @@
extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_raytrace()
{
const int global_index = optixGetLaunchIndex().x;
- const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
+ const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
global_index;
- integrator_shade_surface_raytrace(nullptr, path_index, __params.render_buffer);
+ integrator_shade_surface_raytrace(nullptr, path_index, kernel_params.render_buffer);
+}
+
+extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_mnee()
+{
+ const int global_index = optixGetLaunchIndex().x;
+ const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
+ global_index;
+ integrator_shade_surface_mnee(nullptr, path_index, kernel_params.render_buffer);
}