From e6b38deb9dbb58118f6ee644409ce52f06eac5e5 Mon Sep 17 00:00:00 2001
From: Patrick Mours <pmours@nvidia.com>
Date: Wed, 9 Nov 2022 14:25:32 +0100
Subject: Cycles: Add basic support for using OSL with OptiX

This patch  generalizes the OSL support in Cycles to include GPU
device types and adds an implementation for that in the OptiX
device. There are some caveats still, including simplified texturing
due to lack of OIIO on the GPU and a few missing OSL intrinsics.

Note that this is incomplete and missing an update to the OSL
library before being enabled! The implementation is already
committed now to simplify further development.

Maniphest Tasks: T101222

Differential Revision: https://developer.blender.org/D15902
---
 intern/cycles/kernel/device/optix/compat.h | 31 ++++++++----------------------
 1 file changed, 8 insertions(+), 23 deletions(-)

(limited to 'intern/cycles/kernel/device/optix/compat.h')

diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h
index 1a11a533b7e..e13101f57b8 100644
--- a/intern/cycles/kernel/device/optix/compat.h
+++ b/intern/cycles/kernel/device/optix/compat.h
@@ -33,14 +33,16 @@ typedef unsigned long long uint64_t;
 #endif
 
 #define ccl_device \
-  __device__ __forceinline__  // Function calls are bad for OptiX performance, so inline everything
+  static __device__ \
+      __forceinline__  // Function calls are bad for OptiX performance, so inline everything
+#define ccl_device_extern extern "C" __device__
 #define ccl_device_inline ccl_device
 #define ccl_device_forceinline ccl_device
-#define ccl_device_inline_method ccl_device
-#define ccl_device_noinline __device__ __noinline__
+#define ccl_device_inline_method __device__ __forceinline__
+#define ccl_device_noinline static __device__ __noinline__
 #define ccl_device_noinline_cpu ccl_device
 #define ccl_global
-#define ccl_inline_constant __constant__
+#define ccl_inline_constant static __constant__
 #define ccl_device_constant __constant__ __device__
 #define ccl_constant const
 #define ccl_gpu_shared __shared__
@@ -57,23 +59,6 @@ typedef unsigned long long uint64_t;
 
 #define kernel_assert(cond)
 
-/* GPU thread, block, grid size and index */
-
-#define ccl_gpu_thread_idx_x (threadIdx.x)
-#define ccl_gpu_block_dim_x (blockDim.x)
-#define ccl_gpu_block_idx_x (blockIdx.x)
-#define ccl_gpu_grid_dim_x (gridDim.x)
-#define ccl_gpu_warp_size (warpSize)
-#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
-
-#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
-#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
-
-/* GPU warp synchronization. */
-
-#define ccl_gpu_syncthreads() __syncthreads()
-#define ccl_gpu_ballot(predicate) __ballot_sync(0xFFFFFFFF, predicate)
-
 /* GPU texture objects */
 
 typedef unsigned long long CUtexObject;
@@ -101,14 +86,14 @@ ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D
 
 typedef unsigned short half;
 
-__device__ half __float2half(const float f)
+ccl_device_forceinline half __float2half(const float f)
 {
   half val;
   asm("{  cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
   return val;
 }
 
-__device__ float __half2float(const half h)
+ccl_device_forceinline float __half2float(const half h)
 {
   float val;
   asm("{  cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(h));
-- 
cgit v1.2.3