From 47853bf6f6fa7ab4dc523fe255a8253b7ae9f914 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 22 Nov 2011 13:15:19 +0000 Subject: Cycles: OpenCL tweaks * Reduce kernel arguments size, helps compile for apple nvidia. * Fix use of unitialized variable in displace kernel. * Use build flags in opencl kernel md5 hash. * Reorganize code for kernel feature #defines a bit. --- intern/cycles/device/device_opencl.cpp | 35 +++++++++++++------------ intern/cycles/kernel/kernel.cl | 12 +++------ intern/cycles/kernel/kernel_camera.h | 4 +-- intern/cycles/kernel/kernel_compat_cpu.h | 6 +++-- intern/cycles/kernel/kernel_compat_cuda.h | 2 +- intern/cycles/kernel/kernel_compat_opencl.h | 2 +- intern/cycles/kernel/kernel_globals.h | 3 +-- intern/cycles/kernel/kernel_shader.h | 2 +- intern/cycles/kernel/kernel_types.h | 40 +++++++++++++++++++---------- intern/cycles/render/filter.cpp | 4 ++- 10 files changed, 62 insertions(+), 48 deletions(-) (limited to 'intern') diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index c96d4617ffb..f75928c1b80 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -260,12 +260,9 @@ public: return true; } - bool build_kernel(const string& kernel_path) + string kernel_build_options() { - string build_options = ""; - - build_options += "-I " + kernel_path + ""; /* todo: escape path */ - build_options += " -cl-fast-relaxed-math "; + string build_options = " -cl-fast-relaxed-math "; /* Full Shading only on NVIDIA cards at the moment */ char vendor[256]; @@ -273,14 +270,19 @@ public: clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(vendor), &vendor, NULL); string name = vendor; - if (name == "NVIDIA CUDA") { - build_options += "-D __SVM__ "; - build_options += "-D __EMISSION__ "; - build_options += "-D __TEXTURES__ "; - build_options += "-D __HOLDOUT__ "; - build_options += "-D __MULTI_CLOSURE__ "; - } + if(name == "NVIDIA CUDA") + build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ "; + return build_options; + } + + bool build_kernel(const string& kernel_path) + { + string build_options = ""; + + build_options += "-I " + kernel_path + ""; /* todo: escape path, but it doesn't get parsed correct? */ + build_options += kernel_build_options(); + ciErr = clBuildProgram(cpProgram, 0, NULL, build_options.c_str(), NULL, NULL); if(ciErr != CL_SUCCESS) { @@ -344,6 +346,9 @@ public: md5.append((uint8_t*)name, strlen(name)); md5.append((uint8_t*)driver, strlen(driver)); + string options = kernel_build_options(); + md5.append((uint8_t*)options.c_str(), options.size()); + return md5.get_hex(); } @@ -563,24 +568,20 @@ public: cl_int set_kernel_arg_mem(cl_kernel kernel, int *narg, const char *name) { cl_mem ptr; - cl_int size, err = 0; + cl_int err = 0; if(mem_map.find(name) != mem_map.end()) { device_memory *mem = mem_map[name]; ptr = CL_MEM_PTR(mem->device_pointer); - size = mem->data_width; } else { /* work around NULL not working, even though the spec says otherwise */ ptr = CL_MEM_PTR(null_mem); - size = 1; } err |= clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr); opencl_assert(err); - err |= clSetKernelArg(kernel, (*narg)++, sizeof(size), (void*)&size); - opencl_assert(err); return err; } diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl index 48bee8eef97..68ca24af58e 100644 --- a/intern/cycles/kernel/kernel.cl +++ b/intern/cycles/kernel/kernel.cl @@ -33,8 +33,7 @@ __kernel void kernel_ocl_path_trace( __global uint *rng_state, #define KERNEL_TEX(type, ttype, name) \ - __global type *name, \ - int name##_width, + __global type *name, #include "kernel_textures.h" int sample, @@ -45,8 +44,7 @@ __kernel void kernel_ocl_path_trace( kg->data = data; #define KERNEL_TEX(type, ttype, name) \ - kg->name = name; \ - kg->name##_width = name##_width; + kg->name = name; #include "kernel_textures.h" int x = sx + get_global_id(0); @@ -62,8 +60,7 @@ __kernel void kernel_ocl_tonemap( __global float4 *buffer, #define KERNEL_TEX(type, ttype, name) \ - __global type *name, \ - int name##_width, + __global type *name, #include "kernel_textures.h" int sample, int resolution, @@ -74,8 +71,7 @@ __kernel void kernel_ocl_tonemap( kg->data = data; #define KERNEL_TEX(type, ttype, name) \ - kg->name = name; \ - kg->name##_width = name##_width; + kg->name = name; #include "kernel_textures.h" int x = sx + get_global_id(0); diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h index c2828c20eee..9cdc2f1f865 100644 --- a/intern/cycles/kernel/kernel_camera.h +++ b/intern/cycles/kernel/kernel_camera.h @@ -127,8 +127,8 @@ __device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, floa __device void camera_sample(KernelGlobals *kg, int x, int y, float filter_u, float filter_v, float lens_u, float lens_v, Ray *ray) { /* pixel filter */ - float raster_x = x + kernel_tex_interp(__filter_table, filter_u); - float raster_y = y + kernel_tex_interp(__filter_table, filter_v); + float raster_x = x + kernel_tex_interp(__filter_table, filter_u, FILTER_TABLE_SIZE); + float raster_y = y + kernel_tex_interp(__filter_table, filter_v, FILTER_TABLE_SIZE); /* motion blur */ //ray->time = lerp(time_t, kernel_data.cam.shutter_open, kernel_data.cam.shutter_close); diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index fd96148968f..783ae519845 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -55,8 +55,10 @@ template struct texture { return ((__m128i*)data)[index]; }*/ - float interp(float x) + float interp(float x, int size) { + kernel_assert(size == width); + x = clamp(x, 0.0f, 1.0f)*width; int index = min((int)x, width-1); @@ -151,7 +153,7 @@ typedef texture_image texture_image_uchar4; #define kernel_tex_fetch(tex, index) (kg->tex.fetch(index)) #define kernel_tex_fetch_m128(tex, index) (kg->tex.fetch_m128(index)) #define kernel_tex_fetch_m128i(tex, index) (kg->tex.fetch_m128i(index)) -#define kernel_tex_interp(tex, t) (kg->tex.interp(t)) +#define kernel_tex_interp(tex, t, size) (kg->tex.interp(t, size)) #define kernel_tex_image_interp(tex, x, y) (kg->tex.interp(x, y)) #define kernel_data (kg->__data) diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index 72aef463cab..40129a2f68f 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -55,7 +55,7 @@ typedef texture texture_image_uchar4; /* Macros to handle different memory storage on different devices */ #define kernel_tex_fetch(t, index) tex1Dfetch(t, index) -#define kernel_tex_interp(t, x) tex1D(t, x) +#define kernel_tex_interp(t, x, size) tex1D(t, x) #define kernel_tex_image_interp(t, x, y) tex2D(t, x, y) #define kernel_data __data diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index 287bf320881..5515966807b 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -100,7 +100,7 @@ __device float kernel_tex_interp_(__global float *data, int width, float x) /* data lookup defines */ #define kernel_data (*kg->data) -#define kernel_tex_interp(t, x) kernel_tex_interp_(kg->t, kg->t##_width, x) +#define kernel_tex_interp(t, x, size) kernel_tex_interp_(kg->t, size, x) #define kernel_tex_fetch(t, index) kg->t[index] /* define NULL */ diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index ea866221487..1f2fc97e685 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -77,8 +77,7 @@ typedef struct KernelGlobals { __constant KernelData *data; #define KERNEL_TEX(type, ttype, name) \ - __global type *name; \ - int name##_width; + __global type *name; #include "kernel_textures.h" } KernelGlobals; diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index 1647504207a..570e0721268 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -226,7 +226,7 @@ __device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd, Ng = triangle_normal_MT(kg, prim, &shader); /* force smooth shading for displacement */ - sd->shader |= SHADER_SMOOTH_NORMAL; + shader |= SHADER_SMOOTH_NORMAL; /* watch out: no instance transform currently */ diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 46cdcd1151e..d9bd645b16d 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -25,9 +25,30 @@ CCL_NAMESPACE_BEGIN -#define OBJECT_SIZE 16 -#define LIGHT_SIZE 4 +/* constants */ +#define OBJECT_SIZE 16 +#define LIGHT_SIZE 4 +#define FILTER_TABLE_SIZE 256 +/* device capabilities */ +#ifdef __KERNEL_CPU__ +#define __KERNEL_SHADING__ +#define __KERNEL_ADV_SHADING__ +#endif + +#ifdef __KERNEL_CUDA__ +#define __KERNEL_SHADING__ +#if __CUDA_ARCH__ >= 200 +#define __KERNEL_ADV_SHADING__ +#endif +#endif + +#ifdef __KERNEL_OPENCL__ +//#define __KERNEL_SHADING__ +//#define __KERNEL_ADV_SHADING__ +#endif + +/* kernel features */ #define __SOBOL__ #define __INSTANCING__ #define __DPDU__ @@ -39,27 +60,20 @@ CCL_NAMESPACE_BEGIN #define __CAMERA_CLIPPING__ #define __INTERSECTION_REFINE__ -#ifndef __KERNEL_OPENCL__ +#ifdef __KERNEL_SHADING__ #define __SVM__ #define __EMISSION__ #define __TEXTURES__ #define __HOLDOUT__ -//#define __MULTI_LIGHT__ #endif -#ifdef __KERNEL_CPU__ +#ifdef __KERNEL_ADV_SHADING__ #define __MULTI_CLOSURE__ #define __TRANSPARENT_SHADOWS__ -//#define __OSL__ -#endif - -#ifdef __KERNEL_CUDA__ -#if __CUDA_ARCH__ >= 200 -#define __MULTI_CLOSURE__ -#define __TRANSPARENT_SHADOWS__ -#endif #endif +//#define __MULTI_LIGHT__ +//#define __OSL__ //#define __SOBOL_FULL_SCREEN__ //#define __MODIFY_TP__ //#define __QBVH__ diff --git a/intern/cycles/render/filter.cpp b/intern/cycles/render/filter.cpp index 4925521e4a5..c000f1a0636 100644 --- a/intern/cycles/render/filter.cpp +++ b/intern/cycles/render/filter.cpp @@ -21,6 +21,8 @@ #include "filter.h" #include "scene.h" +#include "kernel_types.h" + #include "util_algorithm.h" #include "util_debug.h" #include "util_math.h" @@ -51,7 +53,7 @@ static float filter_func_gaussian(float v, float width) static vector filter_table(FilterType type, float width) { - const int filter_table_size = 256; + const int filter_table_size = FILTER_TABLE_SIZE; vector filter_table_cdf(filter_table_size+1); vector filter_table(filter_table_size+1); float (*filter_func)(float, float) = NULL; -- cgit v1.2.3