diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2016-02-15 17:40:39 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2016-02-15 23:26:29 +0300 |
commit | 1c4f21f85e17ac557c9867a7764a31d5ebe74eb0 (patch) | |
tree | 72ea537d6624e43e452ce7465f25f92832b88e4f /intern/cycles/device/device_cuda.cpp | |
parent | b5171e250c6816ecce26227615d53cf6f6339892 (diff) |
Cycles: Initial support of 3D textures for CUDA rendering
Supports both smoke/fire and point density textures now.
Reduces number of textures available for sm_20 and sm_21, but you have
to compromise somewhere on such a limited hardware.
Currently limited to linear interpolation only, and decoupled ray
marching is not supported yet. Think those could be considered just a
further improvement.
Some quick example:
https://developer.blender.org/F282934
Code is minimal and we can fully consider it a fix for missing
support of 3D textures with CUDA.
Reviewers: lukasstockner97, brecht, juicyfruit, dingto
Reviewed By: brecht, juicyfruit, dingto
Subscribers: mib2berlin
Differential Revision: https://developer.blender.org/D1806
Diffstat (limited to 'intern/cycles/device/device_cuda.cpp')
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 58 |
1 files changed, 49 insertions, 9 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 3e3cd7515c7..98997ae0968 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -474,9 +474,20 @@ public: InterpolationType interpolation, ExtensionType extension) { - /* todo: support 3D textures, only CPU for now */ VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes."; + string bind_name = name; + if(mem.data_depth > 1) { + /* Kernel uses different bind names for 2d and 3d float textures, + * so we have to adjust couple of things here. + */ + vector<string> tokens; + string_split(tokens, name, "_"); + bind_name = string_printf("__tex_image_%s3d_%s", + tokens[2].c_str(), + tokens[3].c_str()); + } + /* determine format */ CUarray_format_enum format; size_t dsize = datatype_size(mem.data_type); @@ -496,7 +507,7 @@ public: CUtexref texref = NULL; cuda_push_context(); - cuda_assert(cuModuleGetTexRef(&texref, cuModule, name)); + cuda_assert(cuModuleGetTexRef(&texref, cuModule, bind_name.c_str())); if(!texref) { cuda_pop_context(); @@ -505,20 +516,49 @@ public: if(interpolation != INTERPOLATION_NONE) { CUarray handle = NULL; - CUDA_ARRAY_DESCRIPTOR desc; - desc.Width = mem.data_width; - desc.Height = mem.data_height; - desc.Format = format; - desc.NumChannels = mem.data_elements; + if(mem.data_depth > 1) { + CUDA_ARRAY3D_DESCRIPTOR desc; + + desc.Width = mem.data_width; + desc.Height = mem.data_height; + desc.Depth = mem.data_depth; + desc.Format = format; + desc.NumChannels = mem.data_elements; + desc.Flags = 0; + + cuda_assert(cuArray3DCreate(&handle, &desc)); + } + else { + CUDA_ARRAY_DESCRIPTOR desc; - cuda_assert(cuArrayCreate(&handle, &desc)); + desc.Width = mem.data_width; + desc.Height = mem.data_height; + desc.Format = format; + desc.NumChannels = mem.data_elements; + + cuda_assert(cuArrayCreate(&handle, &desc)); + } if(!handle) { cuda_pop_context(); return; } + if(mem.data_depth > 1) { + CUDA_MEMCPY3D param; + memset(¶m, 0, sizeof(param)); + param.dstMemoryType = CU_MEMORYTYPE_ARRAY; + param.dstArray = handle; + param.srcMemoryType = CU_MEMORYTYPE_HOST; + param.srcHost = (void*)mem.data_pointer; + param.srcPitch = mem.data_width*dsize*mem.data_elements; + param.WidthInBytes = param.srcPitch; + param.Height = mem.data_height; + param.Depth = mem.data_depth; + + cuda_assert(cuMemcpy3D(¶m)); + } if(mem.data_height > 1) { CUDA_MEMCPY2D param; memset(¶m, 0, sizeof(param)); @@ -595,7 +635,7 @@ public: CUdeviceptr cumem; size_t cubytes; - cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, name)); + cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str())); if(cubytes == 8) { /* 64 bit device pointer */ |