diff options
author | Campbell Barton <ideasman42@gmail.com> | 2018-02-18 14:33:05 +0300 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2018-02-18 14:33:05 +0300 |
commit | 2bc952fdb6e1474e9e568224a37bcf5cff874aaf (patch) | |
tree | 8d7c3a99896415392035ba350a606d350b6d5e37 /intern/cycles | |
parent | 5d3f679013bbbb9f0c7aae47b5653c54266cf7ca (diff) | |
parent | 7ff3cd26932cbc93068eea4dc7438442216e4ee1 (diff) |
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern/cycles')
-rw-r--r-- | intern/cycles/app/CMakeLists.txt | 3 | ||||
-rw-r--r-- | intern/cycles/device/device.cpp | 3 | ||||
-rw-r--r-- | intern/cycles/device/device.h | 2 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 192 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 20 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_curve_intersect.h | 32 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_compat_cuda.h | 16 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_globals.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_textures.h | 109 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel.cpp | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/kernel_config.h | 14 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h | 121 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm.h | 3 | ||||
-rw-r--r-- | intern/cycles/render/image.cpp | 41 | ||||
-rw-r--r-- | intern/cycles/render/image.h | 1 | ||||
-rw-r--r-- | intern/cycles/util/util_math_intersect.h | 7 | ||||
-rw-r--r-- | intern/cycles/util/util_texture.h | 23 |
17 files changed, 108 insertions, 482 deletions
diff --git a/intern/cycles/app/CMakeLists.txt b/intern/cycles/app/CMakeLists.txt index a8e0297aeaa..d1f86a5fe7d 100644 --- a/intern/cycles/app/CMakeLists.txt +++ b/intern/cycles/app/CMakeLists.txt @@ -126,7 +126,8 @@ if(WITH_CYCLES_CUBIN_COMPILER) # though we are building 32 bit blender a 64 bit cubin_cc will have # to be build to compile the cubins. if(MSVC AND NOT CMAKE_CL_64) - Message("cycles_cubin_cc not supported on x86") + message("Building with CUDA not supported on 32 bit, skipped") + set(WITH_CYCLES_CUDA_BINARIES OFF) else() set(SRC cycles_cubin_cc.cpp diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 56649a7e363..aa2a10a3dd9 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -502,7 +502,6 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th info.description = "Multi Device"; info.num = 0; - info.has_fermi_limits = false; info.has_half_images = true; info.has_volume_decoupled = true; info.bvh_layout_mask = BVH_LAYOUT_ALL; @@ -538,8 +537,6 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th } /* Accumulate device info. */ - info.has_fermi_limits = info.has_fermi_limits || - device.has_fermi_limits; info.has_half_images &= device.has_half_images; info.has_volume_decoupled &= device.has_volume_decoupled; info.bvh_layout_mask = device.bvh_layout_mask & info.bvh_layout_mask; diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index be4b9a7e972..31deba2d796 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -56,7 +56,6 @@ public: int num; bool display_device; /* GPU is used as a display device. */ bool advanced_shading; /* Supports full shading system. */ - bool has_fermi_limits; /* Fixed number of textures limit. */ bool has_half_images; /* Support half-float textures. */ bool has_volume_decoupled; /* Decoupled volume shading. */ BVHLayoutMask bvh_layout_mask; /* Bitmask of supported BVH layouts. */ @@ -73,7 +72,6 @@ public: cpu_threads = 0; display_device = false; advanced_shading = true; - has_fermi_limits = false; has_half_images = false; has_volume_decoupled = false; bvh_layout_mask = BVH_LAYOUT_NONE; diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 64295838f0c..54d4c731ed4 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -309,9 +309,7 @@ public: delete split_kernel; - if(!info.has_fermi_limits) { - texture_info.free(); - } + texture_info.free(); cuda_assert(cuCtxDestroy(cuContext)); } @@ -322,9 +320,9 @@ public: cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId); cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId); - /* We only support sm_20 and above */ - if(major < 2) { - cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor)); + /* We only support sm_30 and above */ + if(major < 3) { + cuda_error_message(string_printf("CUDA device supported only with compute capability 3.0 or up, found %d.%d.", major, minor)); return false; } @@ -462,9 +460,9 @@ public: #ifdef _WIN32 if(have_precompiled_kernels()) { - if(major < 2) { + if(major < 3) { cuda_error_message(string_printf( - "CUDA device requires compute capability 2.0 or up, " + "CUDA device requires compute capability 3.0 or up, " "found %d.%d. Your GPU is not supported.", major, minor)); } @@ -680,7 +678,7 @@ public: void load_texture_info() { - if(!info.has_fermi_limits && need_texture_info) { + if(need_texture_info) { texture_info.copy_to_device(); need_texture_info = false; } @@ -1018,9 +1016,6 @@ public: { CUDAContextScope scope(this); - /* Check if we are on sm_30 or above, for bindless textures. */ - bool has_fermi_limits = info.has_fermi_limits; - /* General variables for both architectures */ string bind_name = mem.name; size_t dsize = datatype_size(mem.data_type); @@ -1074,27 +1069,6 @@ public: } /* Image Texture Storage */ - CUtexref texref = NULL; - - if(has_fermi_limits) { - if(mem.data_depth > 1) { - /* Kernel uses different bind names for 2d and 3d float textures, - * so we have to adjust couple of things here. - */ - vector<string> tokens; - string_split(tokens, mem.name, "_"); - bind_name = string_printf("__tex_image_%s_3d_%s", - tokens[2].c_str(), - tokens[3].c_str()); - } - - cuda_assert(cuModuleGetTexRef(&texref, cuModule, bind_name.c_str())); - - if(!texref) { - return; - } - } - CUarray_format_enum format; switch(mem.data_type) { case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break; @@ -1187,97 +1161,68 @@ public: cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size)); } - if(!has_fermi_limits) { - /* Kepler+, bindless textures. */ - int flat_slot = 0; - if(string_startswith(mem.name, "__tex_image")) { - int pos = string(mem.name).rfind("_"); - flat_slot = atoi(mem.name + pos + 1); - } - else { - assert(0); - } - - CUDA_RESOURCE_DESC resDesc; - memset(&resDesc, 0, sizeof(resDesc)); - - if(array_3d) { - resDesc.resType = CU_RESOURCE_TYPE_ARRAY; - resDesc.res.array.hArray = array_3d; - resDesc.flags = 0; - } - else if(mem.data_height > 0) { - resDesc.resType = CU_RESOURCE_TYPE_PITCH2D; - resDesc.res.pitch2D.devPtr = mem.device_pointer; - resDesc.res.pitch2D.format = format; - resDesc.res.pitch2D.numChannels = mem.data_elements; - resDesc.res.pitch2D.height = mem.data_height; - resDesc.res.pitch2D.width = mem.data_width; - resDesc.res.pitch2D.pitchInBytes = dst_pitch; - } - else { - resDesc.resType = CU_RESOURCE_TYPE_LINEAR; - resDesc.res.linear.devPtr = mem.device_pointer; - resDesc.res.linear.format = format; - resDesc.res.linear.numChannels = mem.data_elements; - resDesc.res.linear.sizeInBytes = mem.device_size; - } - - CUDA_TEXTURE_DESC texDesc; - memset(&texDesc, 0, sizeof(texDesc)); - texDesc.addressMode[0] = address_mode; - texDesc.addressMode[1] = address_mode; - texDesc.addressMode[2] = address_mode; - texDesc.filterMode = filter_mode; - texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES; - - cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL)); - - /* Resize once */ - if(flat_slot >= texture_info.size()) { - /* Allocate some slots in advance, to reduce amount - * of re-allocations. */ - texture_info.resize(flat_slot + 128); - } - - /* Set Mapping and tag that we need to (re-)upload to device */ - TextureInfo& info = texture_info[flat_slot]; - info.data = (uint64_t)cmem->texobject; - info.cl_buffer = 0; - info.interpolation = mem.interpolation; - info.extension = mem.extension; - info.width = mem.data_width; - info.height = mem.data_height; - info.depth = mem.data_depth; - need_texture_info = true; + /* Kepler+, bindless textures. */ + int flat_slot = 0; + if(string_startswith(mem.name, "__tex_image")) { + int pos = string(mem.name).rfind("_"); + flat_slot = atoi(mem.name + pos + 1); } else { - /* Fermi, fixed texture slots. */ - if(array_3d) { - cuda_assert(cuTexRefSetArray(texref, array_3d, CU_TRSA_OVERRIDE_FORMAT)); - } - else if(mem.data_height > 0) { - CUDA_ARRAY_DESCRIPTOR array_desc; - array_desc.Format = format; - array_desc.Height = mem.data_height; - array_desc.Width = mem.data_width; - array_desc.NumChannels = mem.data_elements; - cuda_assert(cuTexRefSetAddress2D_v3(texref, &array_desc, mem.device_pointer, dst_pitch)); - } - else { - cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size)); - } + assert(0); + } - /* Attach to texture reference. */ - cuda_assert(cuTexRefSetFilterMode(texref, filter_mode)); - cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES)); - cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements)); - cuda_assert(cuTexRefSetAddressMode(texref, 0, address_mode)); - cuda_assert(cuTexRefSetAddressMode(texref, 1, address_mode)); - if(mem.data_depth > 1) { - cuda_assert(cuTexRefSetAddressMode(texref, 2, address_mode)); - } + CUDA_RESOURCE_DESC resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + + if(array_3d) { + resDesc.resType = CU_RESOURCE_TYPE_ARRAY; + resDesc.res.array.hArray = array_3d; + resDesc.flags = 0; + } + else if(mem.data_height > 0) { + resDesc.resType = CU_RESOURCE_TYPE_PITCH2D; + resDesc.res.pitch2D.devPtr = mem.device_pointer; + resDesc.res.pitch2D.format = format; + resDesc.res.pitch2D.numChannels = mem.data_elements; + resDesc.res.pitch2D.height = mem.data_height; + resDesc.res.pitch2D.width = mem.data_width; + resDesc.res.pitch2D.pitchInBytes = dst_pitch; } + else { + resDesc.resType = CU_RESOURCE_TYPE_LINEAR; + resDesc.res.linear.devPtr = mem.device_pointer; + resDesc.res.linear.format = format; + resDesc.res.linear.numChannels = mem.data_elements; + resDesc.res.linear.sizeInBytes = mem.device_size; + } + + CUDA_TEXTURE_DESC texDesc; + memset(&texDesc, 0, sizeof(texDesc)); + texDesc.addressMode[0] = address_mode; + texDesc.addressMode[1] = address_mode; + texDesc.addressMode[2] = address_mode; + texDesc.filterMode = filter_mode; + texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES; + + cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL)); + + /* Resize once */ + if(flat_slot >= texture_info.size()) { + /* Allocate some slots in advance, to reduce amount + * of re-allocations. */ + texture_info.resize(flat_slot + 128); + } + + /* Set Mapping and tag that we need to (re-)upload to device */ + TextureInfo& info = texture_info[flat_slot]; + info.data = (uint64_t)cmem->texobject; + info.cl_buffer = 0; + info.interpolation = mem.interpolation; + info.extension = mem.extension; + info.width = mem.data_width; + info.height = mem.data_height; + info.depth = mem.data_depth; + need_texture_info = true; } void tex_free(device_memory& mem) @@ -2550,9 +2495,9 @@ void device_cuda_info(vector<DeviceInfo>& devices) int major; cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, num); - if(major < 2) { + if(major < 3) { VLOG(1) << "Ignoring device \"" << name - << "\", compute capability is too low."; + << "\", this graphics card is no longer supported."; continue; } @@ -2562,8 +2507,7 @@ void device_cuda_info(vector<DeviceInfo>& devices) info.description = string(name); info.num = num; - info.advanced_shading = (major >= 2); - info.has_fermi_limits = !(major >= 3); + info.advanced_shading = (major >= 3); info.has_half_images = (major >= 3); info.has_volume_decoupled = false; info.bvh_layout_mask = BVH_LAYOUT_BVH2; diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index d981b67559e..50ea03a1f8f 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -316,12 +316,8 @@ set(SRC_SPLIT_HEADERS # CUDA module if(WITH_CYCLES_CUDA_BINARIES) - # 32 bit or 64 bit - if(CUDA_64_BIT_DEVICE_CODE) - set(CUDA_BITS 64) - else() - set(CUDA_BITS 32) - endif() + # 64 bit only + set(CUDA_BITS 64) # CUDA version execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT) @@ -420,17 +416,17 @@ if(WITH_CYCLES_CUDA_BINARIES) endmacro() foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) - if(CUDA_VERSION GREATER "89" AND ${arch} MATCHES "sm_2.") - message(STATUS "CUDA binaries for ${arch} disabled, not supported by CUDA 9.") + if(${arch} MATCHES "sm_2.") + message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.") else() # Compile regular kernel CYCLES_CUDA_KERNEL_ADD(${arch} filter "" "${cuda_filter_sources}" FALSE) CYCLES_CUDA_KERNEL_ADD(${arch} kernel "" "${cuda_sources}" FALSE) + endif() - if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES) - # Compile split kernel - CYCLES_CUDA_KERNEL_ADD(${arch} kernel_split "-D __SPLIT__" ${cuda_sources} FALSE) - endif() + if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES) + # Compile split kernel + CYCLES_CUDA_KERNEL_ADD(${arch} kernel_split "-D __SPLIT__" ${cuda_sources} FALSE) endif() endforeach() diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h index e9a149ea1ab..faf3e3cdf2b 100644 --- a/intern/cycles/kernel/geom/geom_curve_intersect.h +++ b/intern/cycles/kernel/geom/geom_curve_intersect.h @@ -18,12 +18,6 @@ CCL_NAMESPACE_BEGIN #ifdef __HAIR__ -#if defined(__KERNEL_CUDA__) && (__CUDA_ARCH__ < 300) -# define ccl_device_curveintersect ccl_device -#else -# define ccl_device_curveintersect ccl_device_forceinline -#endif - #ifdef __KERNEL_SSE2__ ccl_device_inline ssef transform_point_T3(const ssef t[3], const ssef &a) { @@ -32,7 +26,7 @@ ccl_device_inline ssef transform_point_T3(const ssef t[3], const ssef &a) #endif /* On CPU pass P and dir by reference to aligned vector. */ -ccl_device_curveintersect bool cardinal_curve_intersect( +ccl_device_forceinline bool cardinal_curve_intersect( KernelGlobals *kg, Intersection *isect, const float3 ccl_ref P, @@ -505,18 +499,18 @@ ccl_device_curveintersect bool cardinal_curve_intersect( return hit; } -ccl_device_curveintersect bool curve_intersect(KernelGlobals *kg, - Intersection *isect, - float3 P, - float3 direction, - uint visibility, - int object, - int curveAddr, - float time, - int type, - uint *lcg_state, - float difl, - float extmax) +ccl_device_forceinline bool curve_intersect(KernelGlobals *kg, + Intersection *isect, + float3 P, + float3 direction, + uint visibility, + int object, + int curveAddr, + float time, + int type, + uint *lcg_state, + float difl, + float extmax) { /* define few macros to minimize code duplication for SSE */ #ifndef __KERNEL_SSE2__ diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index 900f7fe6a2c..9bd7a572f5f 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -50,10 +50,7 @@ __device__ half __float2half(const float f) /* Qualifier wrappers for different names on different devices */ #define ccl_device __device__ __inline__ -#if __CUDA_ARCH__ < 300 -# define ccl_device_inline __device__ __inline__ -# define ccl_device_forceinline __device__ __forceinline__ -#elif __CUDA_ARCH__ < 500 +#if __CUDA_ARCH__ < 500 # define ccl_device_inline __device__ __forceinline__ # define ccl_device_forceinline __device__ __forceinline__ #else @@ -138,18 +135,9 @@ ccl_device_inline uint ccl_num_groups(uint d) /* Textures */ -/* Use arrays for regular data. This is a little slower than textures on Fermi, - * but allows for cleaner code and we will stop supporting Fermi soon. */ +/* Use arrays for regular data. */ #define kernel_tex_fetch(t, index) t[(index)] -/* On Kepler (6xx) and above, we use Bindless Textures for images. - * On Fermi cards (4xx and 5xx), we have to use regular textures. */ -#if __CUDA_ARCH__ < 300 -typedef texture<float4, 2> texture_image_float4; -typedef texture<float4, 3> texture_image3d_float4; -typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4; -#endif - #define kernel_data __data /* Use fast math functions */ diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index 97d4726407b..74cfacb5bc1 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -47,7 +47,6 @@ struct VolumeStep; typedef struct KernelGlobals { # define KERNEL_TEX(type, name) texture<type> name; -# define KERNEL_IMAGE_TEX(type, ttype, name) # include "kernel/kernel_textures.h" KernelData __data; @@ -93,7 +92,6 @@ typedef struct KernelGlobals { } KernelGlobals; # define KERNEL_TEX(type, name) const __constant__ __device__ type *name; -# define KERNEL_IMAGE_TEX(type, ttype, name) ttype name; # include "kernel/kernel_textures.h" #endif /* __KERNEL_CUDA__ */ diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h index 344b2223573..74b659557e5 100644 --- a/intern/cycles/kernel/kernel_textures.h +++ b/intern/cycles/kernel/kernel_textures.h @@ -18,10 +18,6 @@ # define KERNEL_TEX(type, name) #endif -#ifndef KERNEL_IMAGE_TEX -# define KERNEL_IMAGE_TEX(type, ttype, name) -#endif - /* bvh */ KERNEL_TEX(float4, __bvh_nodes) KERNEL_TEX(float4, __bvh_leaf_nodes) @@ -78,113 +74,8 @@ KERNEL_TEX(float, __lookup_table) /* sobol */ KERNEL_TEX(uint, __sobol_directions) -#if !defined(__KERNEL_CUDA__) || __CUDA_ARCH__ >= 300 /* image textures */ KERNEL_TEX(TextureInfo, __texture_info) -#else -/* full-float image */ -KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_000) -KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_008) -KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_016) -KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_024) -KERNEL_IMAGE_TEX(float4, texture_image_float4, __tex_image_float4_032) - -KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float4_3d_000) -KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float4_3d_008) -KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float4_3d_016) -KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float4_3d_024) -KERNEL_IMAGE_TEX(float4, texture_image3d_float4, __tex_image_float4_3d_032) - -/* image - * These texture names are encoded to their flattened slots as - * ImageManager::type_index_to_flattened_slot() returns them. */ -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_001) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_009) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_017) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_025) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_033) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_041) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_049) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_057) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_065) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_073) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_081) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_089) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_097) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_105) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_113) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_121) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_129) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_137) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_145) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_153) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_161) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_169) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_177) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_185) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_193) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_201) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_209) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_217) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_225) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_233) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_241) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_249) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_257) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_265) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_273) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_281) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_289) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_297) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_305) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_313) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_321) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_329) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_337) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_345) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_353) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_361) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_369) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_377) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_385) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_393) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_401) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_409) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_417) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_425) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_433) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_441) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_449) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_457) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_465) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_473) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_481) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_489) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_497) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_505) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_513) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_521) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_529) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_537) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_545) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_553) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_561) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_569) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_577) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_585) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_593) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_601) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_609) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_617) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_625) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_633) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_641) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_649) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_657) -KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_665) -#endif /* defined(__KERNEL_CUDA__) && __CUDA_ARCH__ < 300 */ #undef KERNEL_TEX -#undef KERNEL_IMAGE_TEX - diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp index aa67262f36b..de487f6123f 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp @@ -85,7 +85,6 @@ void kernel_tex_copy(KernelGlobals *kg, kg->tname.data = (type*)mem; \ kg->tname.width = size; \ } -#define KERNEL_IMAGE_TEX(type, tname) #include "kernel/kernel_textures.h" else { assert(0); diff --git a/intern/cycles/kernel/kernels/cuda/kernel_config.h b/intern/cycles/kernel/kernels/cuda/kernel_config.h index 94f59ff38d9..f3d0d721c5c 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_config.h +++ b/intern/cycles/kernel/kernels/cuda/kernel_config.h @@ -16,20 +16,8 @@ /* device data taken from CUDA occupancy calculator */ -/* 2.0 and 2.1 */ -#if __CUDA_ARCH__ == 200 || __CUDA_ARCH__ == 210 -# define CUDA_MULTIPRESSOR_MAX_REGISTERS 32768 -# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 8 -# define CUDA_BLOCK_MAX_THREADS 1024 -# define CUDA_THREAD_MAX_REGISTERS 63 - -/* tunable parameters */ -# define CUDA_THREADS_BLOCK_WIDTH 16 -# define CUDA_KERNEL_MAX_REGISTERS 32 -# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 40 - /* 3.0 and 3.5 */ -#elif __CUDA_ARCH__ == 300 || __CUDA_ARCH__ == 350 +#if __CUDA_ARCH__ == 300 || __CUDA_ARCH__ == 350 # define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536 # define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16 # define CUDA_BLOCK_MAX_THREADS 1024 diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h index 5ca07eaeb05..91ad289a858 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h +++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h @@ -14,10 +14,6 @@ * limitations under the License. */ -#if __CUDA_ARCH__ >= 300 - -/* Kepler */ - /* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */ ccl_device float cubic_w0(float a) { @@ -191,120 +187,3 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, } } -#else - -/* Fermi */ - -ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) -{ - float4 r; - switch(id) { - case 0: r = tex2D(__tex_image_float4_000, x, y); break; - case 8: r = tex2D(__tex_image_float4_008, x, y); break; - case 16: r = tex2D(__tex_image_float4_016, x, y); break; - case 24: r = tex2D(__tex_image_float4_024, x, y); break; - case 32: r = tex2D(__tex_image_float4_032, x, y); break; - case 1: r = tex2D(__tex_image_byte4_001, x, y); break; - case 9: r = tex2D(__tex_image_byte4_009, x, y); break; - case 17: r = tex2D(__tex_image_byte4_017, x, y); break; - case 25: r = tex2D(__tex_image_byte4_025, x, y); break; - case 33: r = tex2D(__tex_image_byte4_033, x, y); break; - case 41: r = tex2D(__tex_image_byte4_041, x, y); break; - case 49: r = tex2D(__tex_image_byte4_049, x, y); break; - case 57: r = tex2D(__tex_image_byte4_057, x, y); break; - case 65: r = tex2D(__tex_image_byte4_065, x, y); break; - case 73: r = tex2D(__tex_image_byte4_073, x, y); break; - case 81: r = tex2D(__tex_image_byte4_081, x, y); break; - case 89: r = tex2D(__tex_image_byte4_089, x, y); break; - case 97: r = tex2D(__tex_image_byte4_097, x, y); break; - case 105: r = tex2D(__tex_image_byte4_105, x, y); break; - case 113: r = tex2D(__tex_image_byte4_113, x, y); break; - case 121: r = tex2D(__tex_image_byte4_121, x, y); break; - case 129: r = tex2D(__tex_image_byte4_129, x, y); break; - case 137: r = tex2D(__tex_image_byte4_137, x, y); break; - case 145: r = tex2D(__tex_image_byte4_145, x, y); break; - case 153: r = tex2D(__tex_image_byte4_153, x, y); break; - case 161: r = tex2D(__tex_image_byte4_161, x, y); break; - case 169: r = tex2D(__tex_image_byte4_169, x, y); break; - case 177: r = tex2D(__tex_image_byte4_177, x, y); break; - case 185: r = tex2D(__tex_image_byte4_185, x, y); break; - case 193: r = tex2D(__tex_image_byte4_193, x, y); break; - case 201: r = tex2D(__tex_image_byte4_201, x, y); break; - case 209: r = tex2D(__tex_image_byte4_209, x, y); break; - case 217: r = tex2D(__tex_image_byte4_217, x, y); break; - case 225: r = tex2D(__tex_image_byte4_225, x, y); break; - case 233: r = tex2D(__tex_image_byte4_233, x, y); break; - case 241: r = tex2D(__tex_image_byte4_241, x, y); break; - case 249: r = tex2D(__tex_image_byte4_249, x, y); break; - case 257: r = tex2D(__tex_image_byte4_257, x, y); break; - case 265: r = tex2D(__tex_image_byte4_265, x, y); break; - case 273: r = tex2D(__tex_image_byte4_273, x, y); break; - case 281: r = tex2D(__tex_image_byte4_281, x, y); break; - case 289: r = tex2D(__tex_image_byte4_289, x, y); break; - case 297: r = tex2D(__tex_image_byte4_297, x, y); break; - case 305: r = tex2D(__tex_image_byte4_305, x, y); break; - case 313: r = tex2D(__tex_image_byte4_313, x, y); break; - case 321: r = tex2D(__tex_image_byte4_321, x, y); break; - case 329: r = tex2D(__tex_image_byte4_329, x, y); break; - case 337: r = tex2D(__tex_image_byte4_337, x, y); break; - case 345: r = tex2D(__tex_image_byte4_345, x, y); break; - case 353: r = tex2D(__tex_image_byte4_353, x, y); break; - case 361: r = tex2D(__tex_image_byte4_361, x, y); break; - case 369: r = tex2D(__tex_image_byte4_369, x, y); break; - case 377: r = tex2D(__tex_image_byte4_377, x, y); break; - case 385: r = tex2D(__tex_image_byte4_385, x, y); break; - case 393: r = tex2D(__tex_image_byte4_393, x, y); break; - case 401: r = tex2D(__tex_image_byte4_401, x, y); break; - case 409: r = tex2D(__tex_image_byte4_409, x, y); break; - case 417: r = tex2D(__tex_image_byte4_417, x, y); break; - case 425: r = tex2D(__tex_image_byte4_425, x, y); break; - case 433: r = tex2D(__tex_image_byte4_433, x, y); break; - case 441: r = tex2D(__tex_image_byte4_441, x, y); break; - case 449: r = tex2D(__tex_image_byte4_449, x, y); break; - case 457: r = tex2D(__tex_image_byte4_457, x, y); break; - case 465: r = tex2D(__tex_image_byte4_465, x, y); break; - case 473: r = tex2D(__tex_image_byte4_473, x, y); break; - case 481: r = tex2D(__tex_image_byte4_481, x, y); break; - case 489: r = tex2D(__tex_image_byte4_489, x, y); break; - case 497: r = tex2D(__tex_image_byte4_497, x, y); break; - case 505: r = tex2D(__tex_image_byte4_505, x, y); break; - case 513: r = tex2D(__tex_image_byte4_513, x, y); break; - case 521: r = tex2D(__tex_image_byte4_521, x, y); break; - case 529: r = tex2D(__tex_image_byte4_529, x, y); break; - case 537: r = tex2D(__tex_image_byte4_537, x, y); break; - case 545: r = tex2D(__tex_image_byte4_545, x, y); break; - case 553: r = tex2D(__tex_image_byte4_553, x, y); break; - case 561: r = tex2D(__tex_image_byte4_561, x, y); break; - case 569: r = tex2D(__tex_image_byte4_569, x, y); break; - case 577: r = tex2D(__tex_image_byte4_577, x, y); break; - case 585: r = tex2D(__tex_image_byte4_585, x, y); break; - case 593: r = tex2D(__tex_image_byte4_593, x, y); break; - case 601: r = tex2D(__tex_image_byte4_601, x, y); break; - case 609: r = tex2D(__tex_image_byte4_609, x, y); break; - case 617: r = tex2D(__tex_image_byte4_617, x, y); break; - case 625: r = tex2D(__tex_image_byte4_625, x, y); break; - case 633: r = tex2D(__tex_image_byte4_633, x, y); break; - case 641: r = tex2D(__tex_image_byte4_641, x, y); break; - case 649: r = tex2D(__tex_image_byte4_649, x, y); break; - case 657: r = tex2D(__tex_image_byte4_657, x, y); break; - case 665: r = tex2D(__tex_image_byte4_665, x, y); break; - default: r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - return r; -} - -ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp) -{ - float4 r; - switch(id) { - case 0: r = tex3D(__tex_image_float4_3d_000, x, y, z); break; - case 8: r = tex3D(__tex_image_float4_3d_008, x, y, z); break; - case 16: r = tex3D(__tex_image_float4_3d_016, x, y, z); break; - case 24: r = tex3D(__tex_image_float4_3d_024, x, y, z); break; - case 32: r = tex3D(__tex_image_float4_3d_032, x, y, z); break; - } - return r; -} - -#endif - diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index a8f99d23b7d..fae9f783483 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -30,8 +30,7 @@ * in local memory on the GPU, as it would take too many register and indexes in * ways not known at compile time. This seems the only solution even though it * may be slow, with two positive factors. If the same shader is being executed, - * memory access will be coalesced, and on fermi cards, memory will actually be - * cached. + * memory access will be coalesced and cached. * * The result of shader execution will be a single closure. This means the * closure type, associated label, data and weight. Sampling from multiple diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index feaa17148ee..dbe15a67b9e 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -49,7 +49,6 @@ ImageManager::ImageManager(const DeviceInfo& info) /* Set image limits */ max_num_images = TEX_NUM_MAX; has_half_images = info.has_half_images; - cuda_fermi_limits = info.has_fermi_limits; for(size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { tex_num_images[type] = 0; @@ -255,7 +254,7 @@ int ImageManager::add_image(const string& filename, /* Check whether it's a float texture. */ is_float = (type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4); - /* No single channel and half textures on CUDA (Fermi) and no half on OpenCL, use available slots */ + /* No half textures on OpenCL, use full float instead. */ if(!has_half_images) { if(type == IMAGE_DATA_TYPE_HALF4) { type = IMAGE_DATA_TYPE_FLOAT4; @@ -265,15 +264,6 @@ int ImageManager::add_image(const string& filename, } } - if(cuda_fermi_limits) { - if(type == IMAGE_DATA_TYPE_FLOAT) { - type = IMAGE_DATA_TYPE_FLOAT4; - } - else if(type == IMAGE_DATA_TYPE_BYTE) { - type = IMAGE_DATA_TYPE_BYTE4; - } - } - /* Fnd existing image. */ for(slot = 0; slot < images[type].size(); slot++) { img = images[type][slot]; @@ -303,27 +293,16 @@ int ImageManager::add_image(const string& filename, break; } - /* Count if we're over the limit */ - if(cuda_fermi_limits) { - if(tex_num_images[IMAGE_DATA_TYPE_BYTE4] == TEX_NUM_BYTE4_CUDA - || tex_num_images[IMAGE_DATA_TYPE_FLOAT4] == TEX_NUM_FLOAT4_CUDA) - { - printf("ImageManager::add_image: Reached %s image limit (%d), skipping '%s'\n", - name_from_type(type).c_str(), tex_num_images[type], filename.c_str()); - return -1; - } + /* Count if we're over the limit. + * Very unlikely, since max_num_images is insanely big. But better safe than sorry. */ + int tex_count = 0; + for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { + tex_count += tex_num_images[type]; } - else { - /* Very unlikely, since max_num_images is insanely big. But better safe than sorry. */ - int tex_count = 0; - for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { - tex_count += tex_num_images[type]; - } - if(tex_count > max_num_images) { - printf("ImageManager::add_image: Reached image limit (%d), skipping '%s'\n", - max_num_images, filename.c_str()); - return -1; - } + if(tex_count > max_num_images) { + printf("ImageManager::add_image: Reached image limit (%d), skipping '%s'\n", + max_num_images, filename.c_str()); + return -1; } if(slot == images[type].size()) { diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index 3519a67bc05..6fca3ca20d3 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -121,7 +121,6 @@ private: int tex_num_images[IMAGE_DATA_NUM_TYPES]; int max_num_images; bool has_half_images; - bool cuda_fermi_limits; thread_mutex device_mutex; int animation_frame; diff --git a/intern/cycles/util/util_math_intersect.h b/intern/cycles/util/util_math_intersect.h index 498c21b9706..61ddcc38f50 100644 --- a/intern/cycles/util/util_math_intersect.h +++ b/intern/cycles/util/util_math_intersect.h @@ -79,12 +79,7 @@ ccl_device bool ray_aligned_disk_intersect( return true; } -#if defined(__KERNEL_CUDA__) && __CUDA_ARCH__ < 300 -ccl_device_inline -#else -ccl_device_forceinline -#endif -bool ray_triangle_intersect( +ccl_device_forceinline bool ray_triangle_intersect( float3 ray_P, float3 ray_dir, float ray_t, #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) const ssef *ssef_verts, diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h index cec03dc5e6e..4b5f630427d 100644 --- a/intern/cycles/util/util_texture.h +++ b/intern/cycles/util/util_texture.h @@ -20,22 +20,6 @@ CCL_NAMESPACE_BEGIN /* Texture limits on devices. */ - -/* CUDA (Geforce 4xx and 5xx) */ -#define TEX_NUM_FLOAT4_CUDA 5 -#define TEX_NUM_BYTE4_CUDA 84 -#define TEX_NUM_HALF4_CUDA 0 -#define TEX_NUM_FLOAT_CUDA 0 -#define TEX_NUM_BYTE_CUDA 0 -#define TEX_NUM_HALF_CUDA 0 -#define TEX_START_FLOAT4_CUDA 0 -#define TEX_START_BYTE4_CUDA TEX_NUM_FLOAT4_CUDA -#define TEX_START_HALF4_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA) -#define TEX_START_FLOAT_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA) -#define TEX_START_BYTE_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA) -#define TEX_START_HALF_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA) - -/* Any architecture other than old CUDA cards */ #define TEX_NUM_MAX (INT_MAX >> 4) /* Color to use when textures are not found. */ @@ -44,11 +28,8 @@ CCL_NAMESPACE_BEGIN #define TEX_IMAGE_MISSING_B 1 #define TEX_IMAGE_MISSING_A 1 -#if defined (__KERNEL_CUDA__) && (__CUDA_ARCH__ < 300) -# define kernel_tex_type(tex) (tex < TEX_START_BYTE4_CUDA ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_BYTE4) -#else -# define kernel_tex_type(tex) (tex & IMAGE_DATA_TYPE_MASK) -#endif +/* Texture type. */ +#define kernel_tex_type(tex) (tex & IMAGE_DATA_TYPE_MASK) /* Interpolation types for textures * cuda also use texture space to store other objects */ |