diff options
author | Campbell Barton <ideasman42@gmail.com> | 2014-10-07 12:22:47 +0400 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2014-10-07 12:25:29 +0400 |
commit | b09f7dcaa7b487ada53c8dbaa811deaa1201d163 (patch) | |
tree | 0e6bc07122f11b3c219ea334c87c945ebcf03c28 /intern | |
parent | 3d00b8dea388788a52d116704ed06b5e29f920df (diff) | |
parent | 5e809c45edf253e828813b417770889f83a95bb6 (diff) |
Merge branch 'master' into dyntopo_holes
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/device/device_cpu.cpp | 5 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/device/device_opencl.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 8 | ||||
-rw-r--r-- | intern/cycles/kernel/SConscript | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_compat_cpu.h | 7 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_compat_opencl.h | 12 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_debug.h | 9 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_path.h | 16 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_types.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_volume.h | 12 | ||||
-rw-r--r-- | intern/cycles/kernel/osl/SConscript | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/osl/osl_globals.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/osl/osl_shader.cpp | 3 | ||||
-rw-r--r-- | intern/cycles/render/shader.h | 8 | ||||
-rw-r--r-- | intern/cycles/render/tile.cpp | 4 | ||||
-rw-r--r-- | intern/ghost/intern/GHOST_SystemCocoa.mm | 2 |
17 files changed, 56 insertions, 49 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 4623764d210..c9b8a5b726b 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -17,6 +17,11 @@ #include <stdlib.h> #include <string.h> +/* So ImathMath is included before our kernel_cpu_compat. */ +#ifdef WITH_OSL +# include <OSL/oslexec.h> +#endif + #include "device.h" #include "device_intern.h" diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index bd265f0fdad..44be7779891 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -278,6 +278,10 @@ public: if(experimental) command += " -D__KERNEL_CUDA_EXPERIMENTAL__"; +#ifdef WITH_CYCLES_DEBUG + command += " -D__KERNEL_DEBUG__"; +#endif + printf("%s\n", command.c_str()); if(system(command.c_str()) == -1) { diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index d950d084cd4..58b2bcafb82 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -102,7 +102,11 @@ static string opencl_kernel_build_options(const string& platform, const string * if(opencl_kernel_use_debug()) build_options += "-D__KERNEL_OPENCL_DEBUG__ "; - + +#ifdef WITH_CYCLES_DEBUG + build_options += "-D__KERNEL_DEBUG__ "; +#endif + return build_options; } diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index c5ea3abc567..c521e1383a4 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -170,6 +170,12 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_cubin kernel_${arch}.cubin) endif() + if(WITH_CYCLES_DEBUG) + set(cuda_debug_flags "-D__KERNEL_DEBUG__") + else() + set(cuda_debug_flags "") + endif() + set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}") set(cuda_math_flags "--use_fast_math") @@ -185,6 +191,7 @@ if(WITH_CYCLES_CUDA_BINARIES) ${cuda_version_flags} ${cuda_math_flags} ${cuda_extra_flags} + ${cuda_debug_flags} -I${CMAKE_CURRENT_SOURCE_DIR}/../util -I${CMAKE_CURRENT_SOURCE_DIR}/svm -DCCL_NAMESPACE_BEGIN= @@ -197,6 +204,7 @@ if(WITH_CYCLES_CUDA_BINARIES) list(APPEND cuda_cubins ${cuda_cubin}) unset(cuda_extra_flags) + unset(cuda_debug_flags) endmacro() foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) diff --git a/intern/cycles/kernel/SConscript b/intern/cycles/kernel/SConscript index 5a9e57c5342..c0d969e24ae 100644 --- a/intern/cycles/kernel/SConscript +++ b/intern/cycles/kernel/SConscript @@ -79,6 +79,9 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']: nvcc_flags += " -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC" nvcc_flags += " -I \"%s\" -I \"%s\" -I \"%s\" -I \"%s\"" % (util_dir, svm_dir, geom_dir, closure_dir) + if env['WITH_BF_CYCLES_DEBUG']: + nvcc_flags += " -D__KERNEL_DEBUG__" + # dependencies dependencies = ['kernel.cu'] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h') + kernel.Glob('geom/*.h') + kernel.Glob('closure/*.h') last_cubin_file = None diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index c2aab93c87b..25531843993 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -25,6 +25,13 @@ #include "util_half.h" #include "util_types.h" +/* On 64bit linux single precision exponent is really slow comparing to the + * double precision version, even with float<->double conversion involved. + */ +#if !defined(__KERNEL_GPU__) && defined(__linux__) && defined(__x86_64__) +# define expf(x) ((float)exp((double)x)) +#endif + CCL_NAMESPACE_BEGIN /* Assertions inside the kernel only work for the CPU device, so we wrap it in diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index 9e58ebff599..78b3fd012dd 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -24,14 +24,6 @@ #define CCL_NAMESPACE_BEGIN #define CCL_NAMESPACE_END -#ifdef __KERNEL_OPENCL_AMD__ -#define __CL_NO_FLOAT3__ -#endif - -#ifdef __CL_NO_FLOAT3__ -#define float3 float4 -#endif - #ifdef __CL_NOINLINE__ #define ccl_noinline __attribute__((noinline)) #else @@ -73,11 +65,7 @@ #endif #define make_float2(x, y) ((float2)(x, y)) -#ifdef __CL_NO_FLOAT3__ -#define make_float3(x, y, z) ((float4)(x, y, z, 0.0f)) -#else #define make_float3(x, y, z) ((float3)(x, y, z)) -#endif #define make_float4(x, y, z, w) ((float4)(x, y, z, w)) #define make_int2(x, y) ((int2)(x, y)) #define make_int3(x, y, z) ((int3)(x, y, z)) diff --git a/intern/cycles/kernel/kernel_debug.h b/intern/cycles/kernel/kernel_debug.h index 81ce1e76420..bf1bc0e9db8 100644 --- a/intern/cycles/kernel/kernel_debug.h +++ b/intern/cycles/kernel/kernel_debug.h @@ -27,9 +27,12 @@ ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg, DebugData *debug_data, int sample) { - kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversal_steps, - sample, - debug_data->num_bvh_traversal_steps); + int flag = kernel_data.film.pass_flag; + if(flag & PASS_BVH_TRAVERSAL_STEPS) { + kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversal_steps, + sample, + debug_data->num_bvh_traversal_steps); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 0e07d8a95f8..c03229f0a3a 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -46,7 +46,7 @@ #include "kernel_path_volume.h" #ifdef __KERNEL_DEBUG__ -# include "kernel_debug.h" +#include "kernel_debug.h" #endif CCL_NAMESPACE_BEGIN @@ -373,10 +373,8 @@ ccl_device void kernel_path_subsurface_update_volume_stack(KernelGlobals *kg, Ray volume_ray = *ray; Intersection isect; - const float3 Pend = volume_ray.P + volume_ray.D*volume_ray.t; - while(scene_intersect(kg, &volume_ray, PATH_RAY_ALL_VISIBILITY, - &isect, NULL, 0.0f, 0.0f)) + while(scene_intersect_volume(kg, &volume_ray, &isect)) { ShaderData sd; shader_setup_from_ray(kg, &sd, &isect, &volume_ray, 0, 0); @@ -384,15 +382,7 @@ ccl_device void kernel_path_subsurface_update_volume_stack(KernelGlobals *kg, /* Move ray forward. */ volume_ray.P = ray_offset(sd.P, -sd.Ng); - volume_ray.D = normalize_len(Pend - volume_ray.P, - &volume_ray.t); - - /* TODO(sergey): Find a faster way detecting that ray_offset moved - * us pass through the end point. - */ - if(dot(ray->D, volume_ray.D) < 0.0f) { - break; - } + volume_ray.t -= sd.ray_length; } } #endif diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 43becf14825..e5cd23c0925 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -864,7 +864,7 @@ typedef struct KernelFilm { #ifdef __KERNEL_DEBUG__ int pass_bvh_traversal_steps; - int pad[3]; + int pass_pad3, pass_pad4, pass_pad5; #endif } KernelFilm; diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index dd947a54530..93cb4c120ea 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -976,8 +976,9 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg, return; } - const float3 Pend = ray->P + ray->D*ray->t; Ray volume_ray = *ray; + volume_ray.t = FLT_MAX; + int stack_index = 0, enclosed_index = 0; int enclosed_volumes[VOLUME_STACK_SIZE]; @@ -1019,15 +1020,6 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg, /* Move ray forward. */ volume_ray.P = ray_offset(sd.P, -sd.Ng); - if(volume_ray.t != FLT_MAX) { - volume_ray.D = normalize_len(Pend - volume_ray.P, &volume_ray.t); - /* TODO(sergey): Find a faster way detecting that ray_offset moved - * us pass through the end point. - */ - if(dot(ray->D, volume_ray.D) < 0.0f) { - break; - } - } } /* stack_index of 0 means quick checks outside of the kernel gave false * positive, nothing to worry about, just we've wasted quite a few of diff --git a/intern/cycles/kernel/osl/SConscript b/intern/cycles/kernel/osl/SConscript index 4685bb7753e..d721edbaf6e 100644 --- a/intern/cycles/kernel/osl/SConscript +++ b/intern/cycles/kernel/osl/SConscript @@ -43,6 +43,9 @@ defs.append('CCL_NAMESPACE_BEGIN=namespace ccl {') defs.append('CCL_NAMESPACE_END=}') defs.append('WITH_OSL') +if env['WITH_BF_CYCLES_DEBUG']: + defs.append('WITH_CYCLES_DEBUG') + if env['OURPLATFORM'] in ('win32-vc', 'win64-vc'): cxxflags.append('-DBOOST_NO_RTTI -DBOOST_NO_TYPEID /fp:fast'.split()) incs.append(env['BF_PTHREADS_INC']) diff --git a/intern/cycles/kernel/osl/osl_globals.h b/intern/cycles/kernel/osl/osl_globals.h index 5a658d8244a..9c3134e41c9 100644 --- a/intern/cycles/kernel/osl/osl_globals.h +++ b/intern/cycles/kernel/osl/osl_globals.h @@ -20,7 +20,6 @@ #ifdef WITH_OSL #include <OSL/oslexec.h> -#include <cmath> #include "util_map.h" #include "util_param.h" diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp index 48498116874..ca0c2cc4415 100644 --- a/intern/cycles/kernel/osl/osl_shader.cpp +++ b/intern/cycles/kernel/osl/osl_shader.cpp @@ -14,6 +14,8 @@ * limitations under the License */ +#include <OSL/oslexec.h> + #include "kernel_compat_cpu.h" #include "kernel_montecarlo.h" #include "kernel_types.h" @@ -34,7 +36,6 @@ #include "attribute.h" -#include <OSL/oslexec.h> CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h index 0ed6d2ddf01..368496fd188 100644 --- a/intern/cycles/render/shader.h +++ b/intern/cycles/render/shader.h @@ -17,6 +17,10 @@ #ifndef __SHADER_H__ #define __SHADER_H__ +#ifdef WITH_OSL +# include <OSL/oslexec.h> +#endif + #include "attribute.h" #include "kernel_types.h" @@ -25,10 +29,6 @@ #include "util_string.h" #include "util_types.h" -#ifdef WITH_OSL -#include <OSL/oslexec.h> -#endif - CCL_NAMESPACE_BEGIN class Device; diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp index d6094a4fa0a..e37d8e5f8a1 100644 --- a/intern/cycles/render/tile.cpp +++ b/intern/cycles/render/tile.cpp @@ -200,8 +200,8 @@ list<Tile>::iterator TileManager::next_background_tile(int device, TileOrder til switch (tile_order) { case TILE_CENTER: - distx = centx - (cur_tile.x + cur_tile.w); - disty = centy - (cur_tile.y + cur_tile.h); + distx = centx - (cur_tile.x + (cur_tile.w / 2)); + disty = centy - (cur_tile.y + (cur_tile.h / 2)); distx = (int64_t)sqrt((double)(distx * distx + disty * disty)); break; case TILE_RIGHT_TO_LEFT: diff --git a/intern/ghost/intern/GHOST_SystemCocoa.mm b/intern/ghost/intern/GHOST_SystemCocoa.mm index 7b08c46b65d..ccbac91f0b4 100644 --- a/intern/ghost/intern/GHOST_SystemCocoa.mm +++ b/intern/ghost/intern/GHOST_SystemCocoa.mm @@ -740,7 +740,7 @@ bool GHOST_SystemCocoa::processEvents(bool waitForEvent) // For some reason NSApp is swallowing the key up events when modifier // key is pressed, even if there seems to be no apparent reason to do // so, as a workaround we always handle these up events. - if ([event type] == NSKeyUp && (([event modifierFlags] & NSCommandKeyMask) || ([event modifierFlags] & NSAlternateKeyMask))) + if ([event type] == NSKeyUp && ([event modifierFlags] & (NSCommandKeyMask | NSAlternateKeyMask))) handleKeyEvent(event); [NSApp sendEvent:event]; |