/* * Copyright 2011-2013 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __KERNEL_COMPAT_CUDA_H__ #define __KERNEL_COMPAT_CUDA_H__ #define __KERNEL_GPU__ #define __KERNEL_CUDA__ #define CCL_NAMESPACE_BEGIN #define CCL_NAMESPACE_END /* Selective nodes compilation. */ #ifndef __NODES_MAX_GROUP__ # define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX #endif #ifndef __NODES_FEATURES__ # define __NODES_FEATURES__ NODE_FEATURE_ALL #endif #include #include #include #include /* Qualifier wrappers for different names on different devices */ #define ccl_device __device__ __inline__ #if __CUDA_ARCH__ < 300 # define ccl_device_inline __device__ __inline__ # define ccl_device_forceinline __device__ __forceinline__ #elif __CUDA_ARCH__ < 500 # define ccl_device_inline __device__ __forceinline__ # define ccl_device_forceinline __device__ __forceinline__ #else # define ccl_device_inline __device__ __inline__ # define ccl_device_forceinline __device__ __forceinline__ #endif #define ccl_device_noinline __device__ __noinline__ #define ccl_global #define ccl_static_constant __constant__ #define ccl_constant const #define ccl_local __shared__ #define ccl_local_param #define ccl_private #define ccl_may_alias #define ccl_addr_space #define ccl_restrict __restrict__ /* TODO(sergey): In theory we might use references with CUDA, however * performance impact yet to be investigated. */ #define ccl_ref #define ccl_align(n) __align__(n) #define ATTR_FALLTHROUGH #define CCL_MAX_LOCAL_SIZE (CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH) /* No assert supported for CUDA */ #define kernel_assert(cond) /* Types */ #include "util/util_half.h" #include "util/util_types.h" /* Work item functions */ ccl_device_inline uint ccl_local_id(uint d) { switch(d) { case 0: return threadIdx.x; case 1: return threadIdx.y; case 2: return threadIdx.z; default: return 0; } } #define ccl_global_id(d) (ccl_group_id(d) * ccl_local_size(d) + ccl_local_id(d)) ccl_device_inline uint ccl_local_size(uint d) { switch(d) { case 0: return blockDim.x; case 1: return blockDim.y; case 2: return blockDim.z; default: return 0; } } #define ccl_global_size(d) (ccl_num_groups(d) * ccl_local_size(d)) ccl_device_inline uint ccl_group_id(uint d) { switch(d) { case 0: return blockIdx.x; case 1: return blockIdx.y; case 2: return blockIdx.z; default: return 0; } } ccl_device_inline uint ccl_num_groups(uint d) { switch(d) { case 0: return gridDim.x; case 1: return gridDim.y; case 2: return gridDim.z; default: return 0; } } /* Textures */ /* Use arrays for regular data. This is a little slower than textures on Fermi, * but allows for cleaner code and we will stop supporting Fermi soon. */ #define kernel_tex_fetch(t, index) t[(index)] /* On Kepler (6xx) and above, we use Bindless Textures for images. * On Fermi cards (4xx and 5xx), we have to use regular textures. */ #if __CUDA_ARCH__ < 300 typedef texture texture_image_float4; typedef texture texture_image3d_float4; typedef texture texture_image_uchar4; #endif #define kernel_data __data /* Use fast math functions */ #define cosf(x) __cosf(((float)(x))) #define sinf(x) __sinf(((float)(x))) #define powf(x, y) __powf(((float)(x)), ((float)(y))) #define tanf(x) __tanf(((float)(x))) #define logf(x) __logf(((float)(x))) #define expf(x) __expf(((float)(x))) #endif /* __KERNEL_COMPAT_CUDA_H__ */