Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/device/device_opencl.cpp54
-rw-r--r--intern/cycles/kernel/CMakeLists.txt23
-rw-r--r--intern/cycles/kernel/kernel.cl82
-rw-r--r--intern/cycles/kernel/kernel_compat_cuda.h2
-rw-r--r--intern/cycles/kernel/kernel_compat_opencl.h15
-rw-r--r--intern/cycles/kernel/kernel_globals.h200
-rw-r--r--intern/cycles/kernel/kernel_light.h4
-rw-r--r--intern/cycles/kernel/kernel_textures.h153
-rw-r--r--intern/cycles/kernel/kernel_triangle.h50
-rw-r--r--intern/cycles/kernel/kernel_types.h21
-rw-r--r--intern/cycles/kernel/svm/bsdf_ashikhmin_velvet.h4
-rw-r--r--intern/cycles/kernel/svm/bsdf_diffuse.h8
-rw-r--r--intern/cycles/kernel/svm/bsdf_microfacet.h16
-rw-r--r--intern/cycles/kernel/svm/bsdf_ward.h4
-rw-r--r--intern/cycles/kernel/svm/bsdf_westin.h8
-rw-r--r--intern/cycles/kernel/svm/svm_blend.h2
-rw-r--r--intern/cycles/kernel/svm/svm_displace.h5
-rw-r--r--intern/cycles/kernel/svm/svm_distorted_noise.h5
-rw-r--r--intern/cycles/kernel/svm/svm_image.h18
-rw-r--r--intern/cycles/kernel/svm/svm_mix.h26
-rw-r--r--intern/cycles/kernel/svm/svm_sky.h2
-rw-r--r--intern/cycles/kernel/svm/svm_texture.h2
-rw-r--r--intern/cycles/kernel/svm/svm_types.h80
-rw-r--r--intern/cycles/util/util_color.h4
-rw-r--r--intern/cycles/util/util_math.h6
25 files changed, 448 insertions, 346 deletions
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 6b564d10e78..ef416dfb8dc 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -55,6 +55,7 @@ public:
cl_int ciErr;
map<string, device_vector<uchar>*> const_mem_map;
map<string, device_memory*> mem_map;
+ device_ptr null_mem;
const char *opencl_error_string(cl_int err)
{
@@ -125,10 +126,10 @@ public:
ciErr = clGetPlatformIDs(1, &cpPlatform, NULL);
opencl_assert(ciErr);
- ciErr = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 1, &cdDevice, NULL);
+ ciErr = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_CPU, 1, &cdDevice, NULL);
opencl_assert(ciErr);
- cxGPUContext = clCreateContext(0, 1, &cdDevice, NULL /*clLogMessagesToStdoutAPPLE */, NULL, &ciErr);
+ cxGPUContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr);
opencl_assert(ciErr);
cqCommandQueue = clCreateCommandQueue(cxGPUContext, cdDevice, 0, &ciErr);
@@ -137,10 +138,16 @@ public:
/* compile kernel */
string source = string_printf("#include \"kernel.cl\" // %lf\n", time_dt());
size_t source_len = source.size();
- string build_options = "-I ../kernel -I ../util -Werror -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END="; //" + path_get("kernel") + " -Werror";
- //printf("path %s\n", path_get("kernel").c_str());
- //clUnloadCompiler();
+ string build_options = "";
+
+ //string csource = "../blender/intern/cycles";
+ //build_options += "-I " + csource + "/kernel -I " + csource + "/util";
+
+ build_options += " -I " + path_get("kernel"); /* todo: escape path */
+
+ build_options += " -Werror";
+ build_options += " -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END=";
cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&source, &source_len, &ciErr);
@@ -170,10 +177,15 @@ public:
opencl_assert(ciErr);
ckFilmConvertKernel = clCreateKernel(cpProgram, "kernel_ocl_tonemap", &ciErr);
opencl_assert(ciErr);
+
+ null_mem = (device_ptr)clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr);
}
~OpenCLDevice()
{
+
+ clReleaseMemObject(CL_MEM_PTR(null_mem));
+
map<string, device_vector<uchar>*>::iterator mt;
for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) {
mem_free(*(mt->second));
@@ -261,6 +273,7 @@ public:
void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
{
mem_alloc(mem, MEM_READ_ONLY);
+ mem_copy_to(mem);
mem_map[name] = &mem;
}
@@ -295,6 +308,11 @@ public:
ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data);
ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_buffer), (void*)&d_buffer);
ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_rng_state), (void*)&d_rng_state);
+
+#define KERNEL_TEX(type, ttype, name) \
+ ciErr |= set_kernel_arg_mem(ckPathTraceKernel, &narg, #name);
+#include "kernel_textures.h"
+
ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_pass), (void*)&d_pass);
ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_x), (void*)&d_x);
ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y);
@@ -314,10 +332,20 @@ public:
cl_int set_kernel_arg_mem(cl_kernel kernel, int *narg, const char *name)
{
- device_memory *mem = mem_map[name];
- cl_mem ptr = CL_MEM_PTR(mem->device_pointer);
- cl_int size = mem->data_width;
- cl_int err = 0;
+ cl_mem ptr;
+ cl_int size, err = 0;
+
+ if(mem_map.find(name) != mem_map.end()) {
+ device_memory *mem = mem_map[name];
+
+ ptr = CL_MEM_PTR(mem->device_pointer);
+ size = mem->data_width;
+ }
+ else {
+ /* work around NULL not working, even though the spec says otherwise */
+ ptr = CL_MEM_PTR(null_mem);
+ size = 1;
+ }
err |= clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr);
opencl_assert(err);
@@ -347,9 +375,11 @@ public:
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer);
- ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, "__response_curve_R");
- ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, "__response_curve_G");
- ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, "__response_curve_B");
+
+#define KERNEL_TEX(type, ttype, name) \
+ ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, #name);
+#include "kernel_textures.h"
+
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_pass), (void*)&d_pass);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_resolution), (void*)&d_resolution);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x);
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index f1aa1db9e8c..bc1f8bd40a5 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -25,8 +25,11 @@ SET(headers
kernel_qbvh.h
kernel_random.h
kernel_shader.h
+ kernel_textures.h
kernel_triangle.h
- kernel_types.h
+ kernel_types.h)
+
+SET(svm_headers
svm/bsdf.h
svm/bsdf_ashikhmin_velvet.h
svm/bsdf_diffuse.h
@@ -78,7 +81,7 @@ ELSE()
ENDIF()
IF(WITH_CYCLES_CUDA)
- SET(cuda_sources kernel.cu ${headers})
+ SET(cuda_sources kernel.cu ${headers} ${svm_headers})
SET(cuda_cubins)
FOREACH(arch ${CYCLES_CUDA_ARCH})
@@ -106,9 +109,23 @@ ENDIF()
INCLUDE_DIRECTORIES(. ../util osl svm)
-ADD_LIBRARY(cycles_kernel ${sources} ${headers})
+ADD_LIBRARY(cycles_kernel ${sources} ${headers} ${svm_headers})
IF(WITH_CYCLES_CUDA)
ADD_DEPENDENCIES(cycles_kernel cycles_kernel_cuda)
ENDIF()
+# OPENCL kernel
+
+IF(WITH_CYCLES_OPENCL)
+ SET(util_headers
+ ../util/util_color.h
+ ../util/util_math.h
+ ../util/util_transform.h
+ ../util/util_types.h)
+
+ INSTALL(FILES kernel.cl ${headers} DESTINATION ${CYCLES_INSTALL_PATH}/cycles/kernel)
+ INSTALL(FILES ${svm_headers} DESTINATION ${CYCLES_INSTALL_PATH}/cycles/kernel/svm)
+ INSTALL(FILES ${util_headers} DESTINATION ${CYCLES_INSTALL_PATH}/cycles/kernel)
+ENDIF()
+
diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl
index a22db5fe040..e1a9b3a0696 100644
--- a/intern/cycles/kernel/kernel.cl
+++ b/intern/cycles/kernel/kernel.cl
@@ -23,71 +23,61 @@
#include "kernel_types.h"
#include "kernel_globals.h"
-typedef struct KernelGlobals {
- __constant KernelData *data;
-
- __global float *__response_curve_R;
- int __response_curve_R_width;
-
- __global float *__response_curve_G;
- int __response_curve_G_width;
-
- __global float *__response_curve_B;
- int __response_curve_B_width;
-} KernelGlobals;
-
#include "kernel_film.h"
-//#include "kernel_path.h"
+#include "kernel_path.h"
//#include "kernel_displace.h"
-__kernel void kernel_ocl_path_trace(__constant KernelData *data, __global float4 *buffer, __global uint *rng_state, int pass, int sx, int sy, int sw, int sh)
+__kernel void kernel_ocl_path_trace(
+ __constant KernelData *data,
+ __global float4 *buffer,
+ __global uint *rng_state,
+
+#define KERNEL_TEX(type, ttype, name) \
+ __global type *name, \
+ int name##_width,
+#include "kernel_textures.h"
+
+ int pass,
+ int sx, int sy, int sw, int sh)
{
KernelGlobals kglobals, *kg = &kglobals;
+
kg->data = data;
- int x = get_global_id(0);
- int y = get_global_id(1);
+#define KERNEL_TEX(type, ttype, name) \
+ kg->name = name; \
+ kg->name##_width = name##_width;
+#include "kernel_textures.h"
+
+ int x = sx + get_global_id(0);
+ int y = sy + get_global_id(1);
int w = kernel_data.cam.width;
- if(x < sx + sw && y < sy + sh) {
- if(pass == 0) {
- buffer[x + w*y].x = 0.5f;
- buffer[x + w*y].y = 0.5f;
- buffer[x + w*y].z = 0.5f;
- }
- else {
- buffer[x + w*y].x += 0.5f;
- buffer[x + w*y].y += 0.5f;
- buffer[x + w*y].z += 0.5f;
- }
-
- //= make_float3(1.0f, 0.9f, 0.0f);
- //kernel_path_trace(buffer, rng_state, pass, x, y);
- }
+ if(x < sx + sw && y < sy + sh)
+ kernel_path_trace(kg, buffer, rng_state, pass, x, y);
}
__kernel void kernel_ocl_tonemap(
__constant KernelData *data,
__global uchar4 *rgba,
__global float4 *buffer,
- __global float *__response_curve_R,
- int __response_curve_R_width,
- __global float *__response_curve_G,
- int __response_curve_G_width,
- __global float *__response_curve_B,
- int __response_curve_B_width,
+
+#define KERNEL_TEX(type, ttype, name) \
+ __global type *name, \
+ int name##_width,
+#include "kernel_textures.h"
+
int pass, int resolution,
int sx, int sy, int sw, int sh)
{
KernelGlobals kglobals, *kg = &kglobals;
kg->data = data;
- kg->__response_curve_R = __response_curve_R;
- kg->__response_curve_R_width = __response_curve_R_width;
- kg->__response_curve_G = __response_curve_G;
- kg->__response_curve_G_width = __response_curve_G_width;
- kg->__response_curve_B = __response_curve_B;
- kg->__response_curve_B_width = __response_curve_B_width;
+
+#define KERNEL_TEX(type, ttype, name) \
+ kg->name = name; \
+ kg->name##_width = name##_width;
+#include "kernel_textures.h"
int x = sx + get_global_id(0);
int y = sy + get_global_id(1);
@@ -96,10 +86,10 @@ __kernel void kernel_ocl_tonemap(
kernel_film_tonemap(kg, rgba, buffer, pass, resolution, x, y);
}
-__kernel void kernel_ocl_displace(__global uint4 *input, __global float3 *offset, int sx)
+/*__kernel void kernel_ocl_displace(__global uint4 *input, __global float3 *offset, int sx)
{
int x = sx + get_global_id(0);
kernel_displace(input, offset, x);
-}
+}*/
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 99c1df1fb1a..b7b29d46323 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -35,7 +35,7 @@ CCL_NAMESPACE_BEGIN
#define __device_inline __device__ __inline__
#define __global
#define __shared __shared__
-#define __constant __constant__
+#define __constant
/* No assert supported for CUDA */
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index 16ddca5305b..e6e54850605 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -29,6 +29,8 @@ CCL_NAMESPACE_BEGIN
#define __device
#define __device_inline
+#define kernel_assert(cond)
+
__device float kernel_tex_interp_(__global float *data, int width, float x)
{
x = clamp(x, 0.0f, 1.0f)*width;
@@ -40,9 +42,20 @@ __device float kernel_tex_interp_(__global float *data, int width, float x)
return (1.0f - t)*data[index] + t*data[nindex];
}
+#define make_float3(x, y, z) ((float3)(x, y, z)) /* todo 1.1 */
+
+#define __uint_as_float(x) as_float(x)
+#define __float_as_uint(x) as_uint(x)
+#define __int_as_float(x) as_float(x)
+#define __float_as_int(x) as_int(x)
+
#define kernel_data (*kg->data)
#define kernel_tex_interp(t, x) \
- kernel_tex_interp_(kg->t, kg->t##_width, x);
+ kernel_tex_interp_(kg->t, kg->t##_width, x)
+#define kernel_tex_fetch(t, index) \
+ kg->t[index]
+
+#define NULL 0
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index 4c4f35bb508..720a9f28fa1 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -18,190 +18,66 @@
/* Constant Globals */
-#ifdef __KERNEL_CPU__
-
-#ifdef WITH_OSL
-#include "osl_globals.h"
-#endif
-
CCL_NAMESPACE_BEGIN
/* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in
the kernel, to access constant data. These are all stored as "textures", but
these are really just standard arrays. We can't use actually globals because
multiple renders may be running inside the same process. */
-typedef struct KernelGlobals {
-#else
+#ifdef __KERNEL_CPU__
-/* On the GPU, constant memory textures must be globals, so we can't put them
- into a struct. As a result we don't actually use this struct and use actual
- globals and simply pass along a NULL pointer everywhere, which we hope gets
- optimized out. */
-#ifdef __KERNEL_CUDA__
-typedef struct KernelGlobals {} KernelGlobals;
+#ifdef WITH_OSL
+//#include "osl_globals.h"
#endif
+typedef struct KernelGlobals {
+
+#define KERNEL_TEX(type, ttype, name) ttype name;
+#define KERNEL_IMAGE_TEX(type, ttype, name) ttype name;
+#include "kernel_textures.h"
+
+ KernelData __data;
+
+#ifdef WITH_OSL
+ /* On the CPU, we also have the OSL globals here. Most data structures are shared
+ with SVM, the difference is in the shaders and object/mesh attributes. */
+ //OSLGlobals osl;
#endif
-/* globals */
-__constant KernelData __data;
-
-#ifndef __KERNEL_OPENCL__
-
-/* bvh */
-texture_float4 __bvh_nodes;
-texture_float4 __tri_woop;
-texture_uint __prim_index;
-texture_uint __prim_object;
-texture_uint __object_node;
-
-/* objects */
-texture_float4 __objects;
-
-/* triangles */
-texture_float4 __tri_normal;
-texture_float4 __tri_vnormal;
-texture_float4 __tri_vindex;
-texture_float4 __tri_verts;
-
-/* attributes */
-texture_uint4 __attributes_map;
-texture_float __attributes_float;
-texture_float4 __attributes_float3;
-
-/* lights */
-texture_float4 __light_distribution;
-texture_float4 __light_point;
-
-/* shaders */
-texture_uint4 __svm_nodes;
-
-/* camera/film */
-texture_float __filter_table;
-texture_float __response_curve_R;
-texture_float __response_curve_G;
-texture_float __response_curve_B;
-
-/* sobol */
-texture_uint __sobol_directions;
-
-/* image */
-texture_image_uchar4 __tex_image_000;
-texture_image_uchar4 __tex_image_001;
-texture_image_uchar4 __tex_image_002;
-texture_image_uchar4 __tex_image_003;
-texture_image_uchar4 __tex_image_004;
-texture_image_uchar4 __tex_image_005;
-texture_image_uchar4 __tex_image_006;
-texture_image_uchar4 __tex_image_007;
-texture_image_uchar4 __tex_image_008;
-texture_image_uchar4 __tex_image_009;
-texture_image_uchar4 __tex_image_010;
-texture_image_uchar4 __tex_image_011;
-texture_image_uchar4 __tex_image_012;
-texture_image_uchar4 __tex_image_013;
-texture_image_uchar4 __tex_image_014;
-texture_image_uchar4 __tex_image_015;
-texture_image_uchar4 __tex_image_016;
-texture_image_uchar4 __tex_image_017;
-texture_image_uchar4 __tex_image_018;
-texture_image_uchar4 __tex_image_019;
-texture_image_uchar4 __tex_image_020;
-texture_image_uchar4 __tex_image_021;
-texture_image_uchar4 __tex_image_022;
-texture_image_uchar4 __tex_image_023;
-texture_image_uchar4 __tex_image_024;
-texture_image_uchar4 __tex_image_025;
-texture_image_uchar4 __tex_image_026;
-texture_image_uchar4 __tex_image_027;
-texture_image_uchar4 __tex_image_028;
-texture_image_uchar4 __tex_image_029;
-texture_image_uchar4 __tex_image_030;
-texture_image_uchar4 __tex_image_031;
-texture_image_uchar4 __tex_image_032;
-texture_image_uchar4 __tex_image_033;
-texture_image_uchar4 __tex_image_034;
-texture_image_uchar4 __tex_image_035;
-texture_image_uchar4 __tex_image_036;
-texture_image_uchar4 __tex_image_037;
-texture_image_uchar4 __tex_image_038;
-texture_image_uchar4 __tex_image_039;
-texture_image_uchar4 __tex_image_040;
-texture_image_uchar4 __tex_image_041;
-texture_image_uchar4 __tex_image_042;
-texture_image_uchar4 __tex_image_043;
-texture_image_uchar4 __tex_image_044;
-texture_image_uchar4 __tex_image_045;
-texture_image_uchar4 __tex_image_046;
-texture_image_uchar4 __tex_image_047;
-texture_image_uchar4 __tex_image_048;
-texture_image_uchar4 __tex_image_049;
-texture_image_uchar4 __tex_image_050;
-texture_image_uchar4 __tex_image_051;
-texture_image_uchar4 __tex_image_052;
-texture_image_uchar4 __tex_image_053;
-texture_image_uchar4 __tex_image_054;
-texture_image_uchar4 __tex_image_055;
-texture_image_uchar4 __tex_image_056;
-texture_image_uchar4 __tex_image_057;
-texture_image_uchar4 __tex_image_058;
-texture_image_uchar4 __tex_image_059;
-texture_image_uchar4 __tex_image_060;
-texture_image_uchar4 __tex_image_061;
-texture_image_uchar4 __tex_image_062;
-texture_image_uchar4 __tex_image_063;
-texture_image_uchar4 __tex_image_064;
-texture_image_uchar4 __tex_image_065;
-texture_image_uchar4 __tex_image_066;
-texture_image_uchar4 __tex_image_067;
-texture_image_uchar4 __tex_image_068;
-texture_image_uchar4 __tex_image_069;
-texture_image_uchar4 __tex_image_070;
-texture_image_uchar4 __tex_image_071;
-texture_image_uchar4 __tex_image_072;
-texture_image_uchar4 __tex_image_073;
-texture_image_uchar4 __tex_image_074;
-texture_image_uchar4 __tex_image_075;
-texture_image_uchar4 __tex_image_076;
-texture_image_uchar4 __tex_image_077;
-texture_image_uchar4 __tex_image_078;
-texture_image_uchar4 __tex_image_079;
-texture_image_uchar4 __tex_image_080;
-texture_image_uchar4 __tex_image_081;
-texture_image_uchar4 __tex_image_082;
-texture_image_uchar4 __tex_image_083;
-texture_image_uchar4 __tex_image_084;
-texture_image_uchar4 __tex_image_085;
-texture_image_uchar4 __tex_image_086;
-texture_image_uchar4 __tex_image_087;
-texture_image_uchar4 __tex_image_088;
-texture_image_uchar4 __tex_image_089;
-texture_image_uchar4 __tex_image_090;
-texture_image_uchar4 __tex_image_091;
-texture_image_uchar4 __tex_image_092;
-texture_image_uchar4 __tex_image_093;
-texture_image_uchar4 __tex_image_094;
-texture_image_uchar4 __tex_image_095;
-texture_image_uchar4 __tex_image_096;
-texture_image_uchar4 __tex_image_097;
-texture_image_uchar4 __tex_image_098;
-texture_image_uchar4 __tex_image_099;
+} KernelGLobals;
#endif
-#ifdef __KERNEL_CPU__
+/* For CUDA, constant memory textures must be globals, so we can't put them
+ into a struct. As a result we don't actually use this struct and use actual
+ globals and simply pass along a NULL pointer everywhere, which we hope gets
+ optimized out. */
-#ifdef WITH_OSL
+#ifdef __KERNEL_CUDA__
-/* On the CPU, we also have the OSL globals here. Most data structures are shared
- with SVM, the difference is in the shaders and object/mesh attributes. */
+__constant__ KernelData __data;
+typedef struct KernelGlobals {} KernelGlobals;
-OSLGlobals osl;
+#define KERNEL_TEX(type, ttype, name) ttype name;
+#define KERNEL_IMAGE_TEX(type, ttype, name) ttype name;
+#include "kernel_textures.h"
#endif
+/* OpenCL */
+
+#ifdef __KERNEL_OPENCL__
+
+typedef struct KernelGlobals {
+ __constant KernelData *data;
+
+#define KERNEL_TEX(type, ttype, name) \
+ __global type *name; \
+ int name##_width;
+#include "kernel_textures.h"
} KernelGlobals;
+
#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index 537f7ea00f0..5164e5deea2 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -18,14 +18,14 @@
CCL_NAMESPACE_BEGIN
-struct LightSample {
+typedef struct LightSample {
float3 P;
float3 Ng;
int object;
int prim;
int shader;
float weight;
-};
+} LightSample;
/* Point Light */
diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h
new file mode 100644
index 00000000000..bd44ed7eee5
--- /dev/null
+++ b/intern/cycles/kernel/kernel_textures.h
@@ -0,0 +1,153 @@
+
+#ifndef KERNEL_TEX
+#define KERNEL_TEX(type, ttype, name)
+#endif
+
+#ifndef KERNEL_IMAGE_TEX
+#define KERNEL_IMAGE_TEX(type, ttype, name)
+#endif
+
+
+/* bvh */
+KERNEL_TEX(float4, texture_float4, __bvh_nodes)
+KERNEL_TEX(float4, texture_float4, __tri_woop)
+KERNEL_TEX(uint, texture_uint, __prim_index)
+KERNEL_TEX(uint, texture_uint, __prim_object)
+KERNEL_TEX(uint, texture_uint, __object_node)
+
+/* objects */
+KERNEL_TEX(float4, texture_float4, __objects)
+
+/* triangles */
+KERNEL_TEX(float4, texture_float4, __tri_normal)
+KERNEL_TEX(float4, texture_float4, __tri_vnormal)
+KERNEL_TEX(float4, texture_float4, __tri_vindex)
+KERNEL_TEX(float4, texture_float4, __tri_verts)
+
+/* attributes */
+KERNEL_TEX(uint4, texture_uint4, __attributes_map)
+KERNEL_TEX(float, texture_float, __attributes_float)
+KERNEL_TEX(float4, texture_float4, __attributes_float3)
+
+/* lights */
+KERNEL_TEX(float4, texture_float4, __light_distribution)
+KERNEL_TEX(float4, texture_float4, __light_point)
+
+/* shaders */
+KERNEL_TEX(uint4, texture_uint4, __svm_nodes)
+
+/* camera/film */
+KERNEL_TEX(float, texture_float, __filter_table)
+KERNEL_TEX(float, texture_float, __response_curve_R)
+KERNEL_TEX(float, texture_float, __response_curve_G)
+KERNEL_TEX(float, texture_float, __response_curve_B)
+
+/* sobol */
+KERNEL_TEX(uint, texture_uint, __sobol_directions)
+
+/* image */
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_000)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_001)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_002)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_003)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_004)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_005)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_006)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_007)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_008)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_009)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_010)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_011)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_012)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_013)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_014)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_015)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_016)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_017)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_018)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_019)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_020)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_021)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_022)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_023)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_024)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_025)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_026)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_027)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_028)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_029)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_030)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_031)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_032)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_033)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_034)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_035)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_036)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_037)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_038)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_039)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_040)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_041)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_042)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_043)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_044)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_045)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_046)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_047)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_048)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_049)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_050)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_051)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_052)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_053)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_054)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_055)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_056)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_057)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_058)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_059)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_060)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_061)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_062)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_063)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_064)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_065)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_066)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_067)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_068)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_069)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_070)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_071)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_072)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_073)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_074)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_075)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_076)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_077)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_078)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_079)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_080)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_081)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_082)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_083)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_084)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_085)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_086)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_087)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_088)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_089)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_090)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_091)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_092)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_093)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_094)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_095)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_096)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_097)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_098)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_099)
+
+#undef KERNEL_TEX
+#undef KERNEL_IMAGE_TEX
+
+
diff --git a/intern/cycles/kernel/kernel_triangle.h b/intern/cycles/kernel/kernel_triangle.h
index ddf8b7b1caf..7eaf54d14bf 100644
--- a/intern/cycles/kernel/kernel_triangle.h
+++ b/intern/cycles/kernel/kernel_triangle.h
@@ -22,11 +22,11 @@ CCL_NAMESPACE_BEGIN
__device_inline float3 triangle_point_MT(KernelGlobals *kg, int tri_index, float u, float v)
{
/* load triangle vertices */
- float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, tri_index));
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index));
- float3 v0 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
- float3 v1 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
- float3 v2 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
+ float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
+ float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
+ float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
/* compute point */
float t = 1.0f - u - v;
@@ -50,11 +50,11 @@ __device_inline float3 triangle_normal_MT(KernelGlobals *kg, int tri_index, int
{
#if 0
/* load triangle vertices */
- float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, tri_index));
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index));
- float3 v0 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
- float3 v1 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
- float3 v2 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
+ float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
+ float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
+ float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
/* compute normal */
return normalize(cross(v2 - v0, v1 - v0));
@@ -68,11 +68,11 @@ __device_inline float3 triangle_normal_MT(KernelGlobals *kg, int tri_index, int
__device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int tri_index, float u, float v)
{
/* load triangle vertices */
- float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, tri_index));
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index));
- float3 n0 = as_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x)));
- float3 n1 = as_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y)));
- float3 n2 = as_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.z)));
+ float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x)));
+ float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y)));
+ float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.z)));
return normalize((1.0f - u - v)*n2 + u*n0 + v*n1);
}
@@ -80,11 +80,11 @@ __device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int tri_index,
__device_inline void triangle_dPdudv(KernelGlobals *kg, float3 *dPdu, float3 *dPdv, int tri)
{
/* fetch triangle vertex coordinates */
- float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, tri));
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri));
- float3 p0 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
- float3 p1 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
- float3 p2 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
+ float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
+ float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
+ float3 p2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
/* compute derivatives of P w.r.t. uv */
*dPdu = (p0 - p2);
@@ -102,7 +102,7 @@ __device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd,
return kernel_tex_fetch(__attributes_float, offset + sd->prim);
}
else if(elem == ATTR_ELEMENT_VERTEX) {
- float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
float f0 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.x));
float f1 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.y));
@@ -142,14 +142,14 @@ __device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *s
if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
- return as_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim));
+ return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim));
}
else if(elem == ATTR_ELEMENT_VERTEX) {
- float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
- float3 f0 = as_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x)));
- float3 f1 = as_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y)));
- float3 f2 = as_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z)));
+ float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x)));
+ float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y)));
+ float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z)));
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
@@ -160,9 +160,9 @@ __device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *s
}
else if(elem == ATTR_ELEMENT_CORNER) {
int tri = offset + sd->prim*3;
- float3 f0 = as_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
- float3 f1 = as_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
- float3 f2 = as_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
+ float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
+ float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
+ float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index bd337eb95e1..fbe827b1791 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -21,11 +21,7 @@
#include "kernel_math.h"
-#ifndef __KERNEL_OPENCL__
-
-#include "svm_types.h"
-
-#endif
+#include "svm/svm_types.h"
CCL_NAMESPACE_BEGIN
@@ -239,9 +235,7 @@ typedef struct ShaderData {
/* SVM closure data. we always sample a single closure, to get fixed
* memory usage, svm_closure_data contains closure parameters. */
-#ifndef __KERNEL_OPENCL__
ClosureType svm_closure;
-#endif
float3 svm_closure_weight;
float svm_closure_data[3]; /* CUDA gives compile error if out of bounds */
@@ -291,11 +285,15 @@ typedef struct KernelCamera {
float shutterclose;
/* differentials */
- float3 dx, dy;
+ float3 dx;
+ float pad1;
+ float3 dy;
+ float pad2;
/* clipping */
float nearclip;
float cliplength;
+ float pad3, pad4;
/* more matrices */
Transform screentoworld;
@@ -321,13 +319,14 @@ typedef struct KernelBackground {
typedef struct KernelSunSky {
/* sun direction in spherical and cartesian */
- float theta, phi;
+ float theta, phi, pad3, pad4;
float3 dir;
float pad;
/* perez function parameters */
- float zenith_Y, zenith_x, zenith_y;
+ float zenith_Y, zenith_x, zenith_y, pad2;
float perez_Y[5], perez_x[5], perez_y[5];
+ float pad5;
} KernelSunSky;
typedef struct KernelIntegrator {
@@ -348,7 +347,7 @@ typedef struct KernelIntegrator {
float blur_caustics;
/* padding */
- int pad;
+ int pad[2];
} KernelIntegrator;
typedef struct KernelBVH {
diff --git a/intern/cycles/kernel/svm/bsdf_ashikhmin_velvet.h b/intern/cycles/kernel/svm/bsdf_ashikhmin_velvet.h
index 40bae72a6c5..04e4ccb8313 100644
--- a/intern/cycles/kernel/svm/bsdf_ashikhmin_velvet.h
+++ b/intern/cycles/kernel/svm/bsdf_ashikhmin_velvet.h
@@ -139,8 +139,8 @@ __device int bsdf_ashikhmin_velvet_sample(const ShaderData *sd, float randu, flo
// TODO: find a better approximation for the retroreflective bounce
*domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx;
*domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy;
- *domega_in_dx *= 125;
- *domega_in_dy *= 125;
+ *domega_in_dx *= 125.0f;
+ *domega_in_dy *= 125.0f;
#endif
} else
*pdf = 0.0f;
diff --git a/intern/cycles/kernel/svm/bsdf_diffuse.h b/intern/cycles/kernel/svm/bsdf_diffuse.h
index c505de036aa..00493e72203 100644
--- a/intern/cycles/kernel/svm/bsdf_diffuse.h
+++ b/intern/cycles/kernel/svm/bsdf_diffuse.h
@@ -88,8 +88,8 @@ __device int bsdf_diffuse_sample(const ShaderData *sd, float randu, float randv,
// TODO: find a better approximation for the diffuse bounce
*domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx;
*domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy;
- *domega_in_dx *= 125;
- *domega_in_dy *= 125;
+ *domega_in_dx *= 125.0f;
+ *domega_in_dy *= 125.0f;
#endif
}
else
@@ -151,8 +151,8 @@ __device int bsdf_translucent_sample(const ShaderData *sd, float randu, float ra
// TODO: find a better approximation for the diffuse bounce
*domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx;
*domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy;
- *domega_in_dx *= -125;
- *domega_in_dy *= -125;
+ *domega_in_dx *= -125.0f;
+ *domega_in_dy *= -125.0f;
#endif
} else
*pdf = 0;
diff --git a/intern/cycles/kernel/svm/bsdf_microfacet.h b/intern/cycles/kernel/svm/bsdf_microfacet.h
index b6baa1e90d8..a948ba06871 100644
--- a/intern/cycles/kernel/svm/bsdf_microfacet.h
+++ b/intern/cycles/kernel/svm/bsdf_microfacet.h
@@ -195,8 +195,8 @@ __device int bsdf_microfacet_ggx_sample(const ShaderData *sd, float randu, float
// derivatives a bit bigger. In theory this varies with the
// roughness but the exact relationship is complex and
// requires more ops than are practical.
- *domega_in_dx *= 10;
- *domega_in_dy *= 10;
+ *domega_in_dx *= 10.0f;
+ *domega_in_dy *= 10.0f;
#endif
}
}
@@ -246,8 +246,8 @@ __device int bsdf_microfacet_ggx_sample(const ShaderData *sd, float randu, float
// derivatives a bit bigger. In theory this varies with the
// roughness but the exact relationship is complex and
// requires more ops than are practical.
- *domega_in_dx *= 10;
- *domega_in_dy *= 10;
+ *domega_in_dx *= 10.0f;
+ *domega_in_dy *= 10.0f;
#endif
}
}
@@ -423,8 +423,8 @@ __device int bsdf_microfacet_beckmann_sample(const ShaderData *sd, float randu,
// derivatives a bit bigger. In theory this varies with the
// roughness but the exact relationship is complex and
// requires more ops than are practical.
- *domega_in_dx *= 10;
- *domega_in_dy *= 10;
+ *domega_in_dx *= 10.0f;
+ *domega_in_dy *= 10.0f;
#endif
}
}
@@ -478,8 +478,8 @@ __device int bsdf_microfacet_beckmann_sample(const ShaderData *sd, float randu,
// derivatives a bit bigger. In theory this varies with the
// roughness but the exact relationship is complex and
// requires more ops than are practical.
- *domega_in_dx *= 10;
- *domega_in_dy *= 10;
+ *domega_in_dx *= 10.0f;
+ *domega_in_dy *= 10.0f;
#endif
}
}
diff --git a/intern/cycles/kernel/svm/bsdf_ward.h b/intern/cycles/kernel/svm/bsdf_ward.h
index bf591acc9fa..9f857b32468 100644
--- a/intern/cycles/kernel/svm/bsdf_ward.h
+++ b/intern/cycles/kernel/svm/bsdf_ward.h
@@ -187,8 +187,8 @@ __device int bsdf_ward_sample(const ShaderData *sd, float randu, float randv, fl
// derivatives a bit bigger. In theory this varies with the
// roughness but the exact relationship is complex and
// requires more ops than are practical.
- *domega_in_dx *= 10;
- *domega_in_dy *= 10;
+ *domega_in_dx *= 10.0f;
+ *domega_in_dy *= 10.0f;
#endif
}
}
diff --git a/intern/cycles/kernel/svm/bsdf_westin.h b/intern/cycles/kernel/svm/bsdf_westin.h
index 7fe10f10dfc..6031012d0ca 100644
--- a/intern/cycles/kernel/svm/bsdf_westin.h
+++ b/intern/cycles/kernel/svm/bsdf_westin.h
@@ -122,8 +122,8 @@ __device int bsdf_westin_backscatter_sample(const ShaderData *sd, float randu, f
// derivatives a bit bigger. In theory this varies with the
// exponent but the exact relationship is complex and
// requires more ops than are practical.
- *domega_in_dx *= 10;
- *domega_in_dy *= 10;
+ *domega_in_dx *= 10.0f;
+ *domega_in_dy *= 10.0f;
#endif
}
}
@@ -198,8 +198,8 @@ __device int bsdf_westin_sheen_sample(const ShaderData *sd, float randu, float r
// TODO: find a better approximation for the diffuse bounce
*domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx;
*domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy;
- *domega_in_dx *= 125;
- *domega_in_dy *= 125;
+ *domega_in_dx *= 125.0f;
+ *domega_in_dy *= 125.0f;
#endif
} else
pdf = 0;
diff --git a/intern/cycles/kernel/svm/svm_blend.h b/intern/cycles/kernel/svm/svm_blend.h
index b1be7b7f6bc..97fa4aff9e7 100644
--- a/intern/cycles/kernel/svm/svm_blend.h
+++ b/intern/cycles/kernel/svm/svm_blend.h
@@ -41,7 +41,7 @@ __device float svm_blend(float3 p, NodeBlendType type, NodeBlendAxis axis)
return r*r;
}
else if(type == NODE_BLEND_EASING) {
- float r = min(fmaxf((1.0f + x)/2.0f, 0.0f), 1.0f);
+ float r = fminf(fmaxf((1.0f + x)/2.0f, 0.0f), 1.0f);
float t = r*r;
return (3.0f*t - 2.0f*t*r);
diff --git a/intern/cycles/kernel/svm/svm_displace.h b/intern/cycles/kernel/svm/svm_displace.h
index db8a8a13289..b1677f67eca 100644
--- a/intern/cycles/kernel/svm/svm_displace.h
+++ b/intern/cycles/kernel/svm/svm_displace.h
@@ -34,8 +34,9 @@ __device void svm_node_set_bump(ShaderData *sd, float *stack, uint c_offset, uin
float3 surfgrad = (h_x - h_c)*Rx + (h_y - h_c)*Ry;
surfgrad *= 0.1f; /* todo: remove this factor */
-
- sd->N = normalize(fabsf(det)*sd->N - signf(det)*surfgrad);
+
+ float absdet = fabsf(det);
+ sd->N = normalize(absdet*sd->N - signf(det)*surfgrad);
#endif
}
diff --git a/intern/cycles/kernel/svm/svm_distorted_noise.h b/intern/cycles/kernel/svm/svm_distorted_noise.h
index 469313e377d..7518a3a9d2d 100644
--- a/intern/cycles/kernel/svm/svm_distorted_noise.h
+++ b/intern/cycles/kernel/svm/svm_distorted_noise.h
@@ -23,12 +23,13 @@ CCL_NAMESPACE_BEGIN
__device float svm_distorted_noise(float3 p, float size, NodeNoiseBasis basis, NodeNoiseBasis distortion_basis, float distortion)
{
float3 r;
+ float3 offset = make_float3(13.5f, 13.5f, 13.5f);
p /= size;
- r.x = noise_basis(p + make_float3(13.5f, 13.5f, 13.5f), basis) * distortion;
+ r.x = noise_basis(p + offset, basis) * distortion;
r.y = noise_basis(p, basis) * distortion;
- r.z = noise_basis(p - make_float3(13.5f, 13.5f, 13.5f), basis) * distortion;
+ r.z = noise_basis(p - offset, basis) * distortion;
return noise_basis(p + r, distortion_basis); /* distorted-domain noise */
}
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index 88f0b582442..586e35c6465 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -31,6 +31,9 @@ __device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y)
also note that cuda has 128 textures limit, we use 100 now, since
we still need some for other storage */
+#ifdef __KERNEL_OPENCL__
+ r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); /* todo */
+#else
switch(id) {
case 0: r = kernel_tex_image_interp(__tex_image_000, x, y); break;
case 1: r = kernel_tex_image_interp(__tex_image_001, x, y); break;
@@ -136,6 +139,7 @@ __device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y)
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
+#endif
return r;
}
@@ -151,8 +155,11 @@ __device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *stack
float4 f = svm_image_texture(kg, id, co.x, co.y);
float3 r = make_float3(f.x, f.y, f.z);
- if(srgb)
- r = color_srgb_to_scene_linear(r);
+ if(srgb) {
+ r.x = color_srgb_to_scene_linear(r.x);
+ r.y = color_srgb_to_scene_linear(r.y);
+ r.z = color_srgb_to_scene_linear(r.z);
+ }
stack_store_float3(stack, out_offset, r);
}
@@ -170,8 +177,11 @@ __device void svm_node_tex_environment(KernelGlobals *kg, ShaderData *sd, float
float4 f = svm_image_texture(kg, id, u, v);
float3 r = make_float3(f.x, f.y, f.z);
- if(srgb)
- r = color_srgb_to_scene_linear(r);
+ if(srgb) {
+ r.x = color_srgb_to_scene_linear(r.x);
+ r.y = color_srgb_to_scene_linear(r.y);
+ r.z = color_srgb_to_scene_linear(r.z);
+ }
stack_store_float3(stack, out_offset, r);
}
diff --git a/intern/cycles/kernel/svm/svm_mix.h b/intern/cycles/kernel/svm/svm_mix.h
index 5a8ca2f76dd..c9e6cdf43b9 100644
--- a/intern/cycles/kernel/svm/svm_mix.h
+++ b/intern/cycles/kernel/svm/svm_mix.h
@@ -41,7 +41,8 @@ __device float3 rgb_to_hsv(float3 rgb)
h = 0.0f;
}
else {
- c = (make_float3(cmax, cmax, cmax) - rgb)/cdelta;
+ float3 cmax3 = make_float3(cmax, cmax, cmax);
+ c = (cmax3 - rgb)/cdelta;
if(rgb.x == cmax) h = c.z - c.y;
else if(rgb.y == cmax) h = 2.0f + c.x - c.z;
@@ -91,26 +92,33 @@ __device float3 hsv_to_rgb(float3 hsv)
return rgb;
}
+__device float3 svm_lerp(const float3 a, const float3 b, float t)
+{
+ return (a * (1.0f - t) + b * t);
+}
+
__device float3 svm_mix_blend(float t, float3 col1, float3 col2)
{
- return lerp(col1, col2, t);
+ return svm_lerp(col1, col2, t);
}
__device float3 svm_mix_add(float t, float3 col1, float3 col2)
{
- return lerp(col1, col1 + col2, t);
+ return svm_lerp(col1, col1 + col2, t);
}
__device float3 svm_mix_mul(float t, float3 col1, float3 col2)
{
- return lerp(col1, col1 * col2, t);
+ return svm_lerp(col1, col1 * col2, t);
}
__device float3 svm_mix_screen(float t, float3 col1, float3 col2)
{
float tm = 1.0f - t;
+ float3 one = make_float3(1.0f, 1.0f, 1.0f);
+ float3 tm3 = make_float3(tm, tm, tm);
- return make_float3(1.0f, 1.0f, 1.0f) - (make_float3(tm, tm, tm) + t*(make_float3(1.0f, 1.0f, 1.0f) - col2))*(make_float3(1.0f, 1.0f, 1.0f) - col1);
+ return one - (tm3 + t*(one - col2))*(one - col1);
}
__device float3 svm_mix_overlay(float t, float3 col1, float3 col2)
@@ -139,7 +147,7 @@ __device float3 svm_mix_overlay(float t, float3 col1, float3 col2)
__device float3 svm_mix_sub(float t, float3 col1, float3 col2)
{
- return lerp(col1, col1 - col2, t);
+ return svm_lerp(col1, col1 - col2, t);
}
__device float3 svm_mix_div(float t, float3 col1, float3 col2)
@@ -157,7 +165,7 @@ __device float3 svm_mix_div(float t, float3 col1, float3 col2)
__device float3 svm_mix_diff(float t, float3 col1, float3 col2)
{
- return lerp(col1, fabs(col1 - col2), t);
+ return svm_lerp(col1, fabs(col1 - col2), t);
}
__device float3 svm_mix_dark(float t, float3 col1, float3 col2)
@@ -255,7 +263,7 @@ __device float3 svm_mix_hue(float t, float3 col1, float3 col2)
hsv.x = hsv2.x;
float3 tmp = hsv_to_rgb(hsv);
- outcol = lerp(outcol, tmp, t);
+ outcol = svm_lerp(outcol, tmp, t);
}
return outcol;
@@ -302,7 +310,7 @@ __device float3 svm_mix_color(float t, float3 col1, float3 col2)
hsv.y = hsv2.y;
float3 tmp = hsv_to_rgb(hsv);
- outcol = lerp(outcol, tmp, t);
+ outcol = svm_lerp(outcol, tmp, t);
}
return outcol;
diff --git a/intern/cycles/kernel/svm/svm_sky.h b/intern/cycles/kernel/svm/svm_sky.h
index dd02cb64cd7..eaba4d18365 100644
--- a/intern/cycles/kernel/svm/svm_sky.h
+++ b/intern/cycles/kernel/svm/svm_sky.h
@@ -49,7 +49,7 @@ __device float sky_angle_between(float thetav, float phiv, float theta, float ph
return safe_acosf(cospsi);
}
-__device float sky_perez_function(float lam[5], float theta, float gamma)
+__device float sky_perez_function(__constant float *lam, float theta, float gamma)
{
float ctheta = cosf(theta);
float cgamma = cosf(gamma);
diff --git a/intern/cycles/kernel/svm/svm_texture.h b/intern/cycles/kernel/svm/svm_texture.h
index c5ded6d975f..d4765cca384 100644
--- a/intern/cycles/kernel/svm/svm_texture.h
+++ b/intern/cycles/kernel/svm/svm_texture.h
@@ -69,7 +69,7 @@ __device void voronoi(float3 p, NodeDistanceMetric distance_metric, float e, flo
float3 pd = p - (vp + ip);
float d = voronoi_distance(distance_metric, pd, e);
- vp += make_float3((float)xx, (float)yy, (float)zz);
+ vp += ip;
if(d < da[0]) {
da[3] = da[2];
diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h
index aa58aba79c9..769ccfc9bc0 100644
--- a/intern/cycles/kernel/svm/svm_types.h
+++ b/intern/cycles/kernel/svm/svm_types.h
@@ -35,7 +35,7 @@ CCL_NAMESPACE_BEGIN
* happens i have no idea, but consecutive values are problematic, maybe it
* generates an incorrect jump table. */
-enum NodeType {
+typedef enum NodeType {
NODE_END = 0,
NODE_CLOSURE_BSDF = 100,
NODE_CLOSURE_EMISSION = 200,
@@ -82,23 +82,23 @@ enum NodeType {
NODE_ATTR_BUMP_DX = 4400,
NODE_ATTR_BUMP_DY = 4500,
NODE_TEX_ENVIRONMENT = 4600
-};
+} NodeType;
-enum NodeAttributeType {
+typedef enum NodeAttributeType {
NODE_ATTR_FLOAT = 0,
NODE_ATTR_FLOAT3
-};
+} NodeAttributeType;
-enum NodeGeometry {
+typedef enum NodeGeometry {
NODE_GEOM_P = 0,
NODE_GEOM_N,
NODE_GEOM_T,
NODE_GEOM_I,
NODE_GEOM_Ng,
NODE_GEOM_uv
-};
+} NodeGeometry;
-enum NodeLightPath {
+typedef enum NodeLightPath {
NODE_LP_camera = 0,
NODE_LP_shadow,
NODE_LP_diffuse,
@@ -106,16 +106,16 @@ enum NodeLightPath {
NODE_LP_reflection,
NODE_LP_transmission,
NODE_LP_backfacing
-};
+} NodeLightPath;
-enum NodeTexCoord {
+typedef enum NodeTexCoord {
NODE_TEXCO_OBJECT,
NODE_TEXCO_CAMERA,
NODE_TEXCO_WINDOW,
NODE_TEXCO_REFLECTION
-};
+} NodeTexCoord;
-enum NodeMix {
+typedef enum NodeMix {
NODE_MIX_BLEND = 0,
NODE_MIX_ADD,
NODE_MIX_MUL,
@@ -134,9 +134,9 @@ enum NodeMix {
NODE_MIX_COLOR,
NODE_MIX_SOFT,
NODE_MIX_LINEAR
-};
+} NodeMix;
-enum NodeMath {
+typedef enum NodeMath {
NODE_MATH_ADD,
NODE_MATH_SUBTRACT,
NODE_MATH_MULTIPLY,
@@ -154,24 +154,24 @@ enum NodeMath {
NODE_MATH_ROUND,
NODE_MATH_LESS_THAN,
NODE_MATH_GREATER_THAN
-};
+} NodeMath;
-enum NodeVectorMath {
+typedef enum NodeVectorMath {
NODE_VECTOR_MATH_ADD,
NODE_VECTOR_MATH_SUBTRACT,
NODE_VECTOR_MATH_AVERAGE,
NODE_VECTOR_MATH_DOT_PRODUCT,
NODE_VECTOR_MATH_CROSS_PRODUCT,
NODE_VECTOR_MATH_NORMALIZE
-};
+} NodeVectorMath;
-enum NodeConvert {
+typedef enum NodeConvert {
NODE_CONVERT_FV,
NODE_CONVERT_CF,
NODE_CONVERT_VF
-};
+} NodeConvert;
-enum NodeDistanceMetric {
+typedef enum NodeDistanceMetric {
NODE_VORONOI_DISTANCE_SQUARED,
NODE_VORONOI_ACTUAL_DISTANCE,
NODE_VORONOI_MANHATTAN,
@@ -179,9 +179,9 @@ enum NodeDistanceMetric {
NODE_VORONOI_MINKOVSKY_H,
NODE_VORONOI_MINKOVSKY_4,
NODE_VORONOI_MINKOVSKY
-};
+} NodeDistanceMetric;
-enum NodeNoiseBasis {
+typedef enum NodeNoiseBasis {
NODE_NOISE_PERLIN,
NODE_NOISE_VORONOI_F1,
NODE_NOISE_VORONOI_F2,
@@ -190,30 +190,30 @@ enum NodeNoiseBasis {
NODE_NOISE_VORONOI_F2_F1,
NODE_NOISE_VORONOI_CRACKLE,
NODE_NOISE_CELL_NOISE
-};
+} NodeNoiseBasis;
-enum NodeWaveType {
+typedef enum NodeWaveType {
NODE_WAVE_SINE,
NODE_WAVE_SAW,
NODE_WAVE_TRI
-};
+} NodeWaveType;
-enum NodeMusgraveType {
+typedef enum NodeMusgraveType {
NODE_MUSGRAVE_MULTIFRACTAL,
NODE_MUSGRAVE_FBM,
NODE_MUSGRAVE_HYBRID_MULTIFRACTAL,
NODE_MUSGRAVE_RIDGED_MULTIFRACTAL,
NODE_MUSGRAVE_HETERO_TERRAIN
-};
+} NodeMusgraveType;
-enum NodeWoodType {
+typedef enum NodeWoodType {
NODE_WOOD_BANDS,
NODE_WOOD_RINGS,
NODE_WOOD_BAND_NOISE,
NODE_WOOD_RING_NOISE
-};
+} NodeWoodType;
-enum NodeBlendType {
+typedef enum NodeBlendType {
NODE_BLEND_LINEAR,
NODE_BLEND_QUADRATIC,
NODE_BLEND_EASING,
@@ -221,37 +221,37 @@ enum NodeBlendType {
NODE_BLEND_RADIAL,
NODE_BLEND_QUADRATIC_SPHERE,
NODE_BLEND_SPHERICAL
-};
+} NodeBlendType;
-enum NodeBlendAxis {
+typedef enum NodeBlendAxis {
NODE_BLEND_HORIZONTAL,
NODE_BLEND_VERTICAL
-};
+} NodeBlendAxis;
-enum NodeMarbleType {
+typedef enum NodeMarbleType {
NODE_MARBLE_SOFT,
NODE_MARBLE_SHARP,
NODE_MARBLE_SHARPER
-};
+} NodeMarbleType;
-enum NodeStucciType {
+typedef enum NodeStucciType {
NODE_STUCCI_PLASTIC,
NODE_STUCCI_WALL_IN,
NODE_STUCCI_WALL_OUT
-};
+} NodeStucciType;
-enum NodeVoronoiColoring {
+typedef enum NodeVoronoiColoring {
NODE_VORONOI_INTENSITY,
NODE_VORONOI_POSITION,
NODE_VORONOI_POSITION_OUTLINE,
NODE_VORONOI_POSITION_OUTLINE_INTENSITY
-};
+} NodeVoronoiColoring;
-enum ShaderType {
+typedef enum ShaderType {
SHADER_TYPE_SURFACE,
SHADER_TYPE_VOLUME,
SHADER_TYPE_DISPLACEMENT
-};
+} ShaderType;
/* Closure */
diff --git a/intern/cycles/util/util_color.h b/intern/cycles/util/util_color.h
index 60b738bfc51..a11a5c7c2a7 100644
--- a/intern/cycles/util/util_color.h
+++ b/intern/cycles/util/util_color.h
@@ -40,6 +40,8 @@ __device float color_scene_linear_to_srgb(float c)
return 1.055f * pow(c, 1.0f/2.4f) - 0.055f;
}
+#ifndef __KERNEL_OPENCL__
+
__device float3 color_srgb_to_scene_linear(float3 c)
{
return make_float3(
@@ -56,6 +58,8 @@ __device float3 color_scene_linear_to_srgb(float3 c)
color_scene_linear_to_srgb(c.z));
}
+#endif
+
CCL_NAMESPACE_END
#endif /* __UTIL_COLOR_H__ */
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 3475e309af5..e6dd00fd86b 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -506,13 +506,13 @@ __device_inline float3 fabs(float3 a)
return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z));
}
-__device_inline float3 as_float3(const float4& a)
+#endif
+
+__device_inline float3 float4_to_float3(const float4 a)
{
return make_float3(a.x, a.y, a.z);
}
-#endif
-
#ifndef __KERNEL_GPU__
__device_inline void print_float3(const char *label, const float3& a)