Cycles: Initial support of 3D textures for CUDA rendering

Supports both smoke/fire and point density textures now. Reduces number of textures available for sm_20 and sm_21, but you have to compromise somewhere on such a limited hardware. Currently limited to linear interpolation only, and decoupled ray marching is not supported yet. Think those could be considered just a further improvement. Some quick example: https://developer.blender.org/F282934 Code is minimal and we can fully consider it a fix for missing support of 3D textures with CUDA. Reviewers: lukasstockner97, brecht, juicyfruit, dingto Reviewed By: brecht, juicyfruit, dingto Subscribers: mib2berlin Differential Revision: https://developer.blender.org/D1806
author: Sergey Sharybin <sergey.vfx@gmail.com> 2016-02-15 17:40:39 +0300
committer: Sergey Sharybin <sergey.vfx@gmail.com> 2016-02-15 23:26:29 +0300
commit: 1c4f21f85e17ac557c9867a7764a31d5ebe74eb0 (patch)
tree: 72ea537d6624e43e452ce7465f25f92832b88e4f /intern/cycles/device/device_cuda.cpp
parent: b5171e250c6816ecce26227615d53cf6f6339892 (diff)
1 files changed, 49 insertions, 9 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 3e3cd7515c7..98997ae0968 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -474,9 +474,20 @@ public:
 	               InterpolationType interpolation,
 	               ExtensionType extension)
 	{
-		/* todo: support 3D textures, only CPU for now */
 		VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
 
+		string bind_name = name;
+		if(mem.data_depth > 1) {
+			/* Kernel uses different bind names for 2d and 3d float textures,
+			 * so we have to adjust couple of things here.
+			 */
+			vector<string> tokens;
+			string_split(tokens, name, "_");
+			bind_name = string_printf("__tex_image_%s3d_%s",
+			                          tokens[2].c_str(),
+			                          tokens[3].c_str());
+		}
+
 		/* determine format */
 		CUarray_format_enum format;
 		size_t dsize = datatype_size(mem.data_type);
@@ -496,7 +507,7 @@ public:
 			CUtexref texref = NULL;
 
 			cuda_push_context();
-			cuda_assert(cuModuleGetTexRef(&texref, cuModule, name));
+			cuda_assert(cuModuleGetTexRef(&texref, cuModule, bind_name.c_str()));
 
 			if(!texref) {
 				cuda_pop_context();
@@ -505,20 +516,49 @@ public:
 
 			if(interpolation != INTERPOLATION_NONE) {
 				CUarray handle = NULL;
-				CUDA_ARRAY_DESCRIPTOR desc;
 
-				desc.Width = mem.data_width;
-				desc.Height = mem.data_height;
-				desc.Format = format;
-				desc.NumChannels = mem.data_elements;
+				if(mem.data_depth > 1) {
+					CUDA_ARRAY3D_DESCRIPTOR desc;
+
+					desc.Width = mem.data_width;
+					desc.Height = mem.data_height;
+					desc.Depth = mem.data_depth;
+					desc.Format = format;
+					desc.NumChannels = mem.data_elements;
+					desc.Flags = 0;
+
+					cuda_assert(cuArray3DCreate(&handle, &desc));
+				}
+				else {
+					CUDA_ARRAY_DESCRIPTOR desc;
 
-				cuda_assert(cuArrayCreate(&handle, &desc));
+					desc.Width = mem.data_width;
+					desc.Height = mem.data_height;
+					desc.Format = format;
+					desc.NumChannels = mem.data_elements;
+
+					cuda_assert(cuArrayCreate(&handle, &desc));
+				}
 
 				if(!handle) {
 					cuda_pop_context();
 					return;
 				}
 
+				if(mem.data_depth > 1) {
+					CUDA_MEMCPY3D param;
+					memset(&param, 0, sizeof(param));
+					param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+					param.dstArray = handle;
+					param.srcMemoryType = CU_MEMORYTYPE_HOST;
+					param.srcHost = (void*)mem.data_pointer;
+					param.srcPitch = mem.data_width*dsize*mem.data_elements;
+					param.WidthInBytes = param.srcPitch;
+					param.Height = mem.data_height;
+					param.Depth = mem.data_depth;
+
+					cuda_assert(cuMemcpy3D(&param));
+				}
 				if(mem.data_height > 1) {
 					CUDA_MEMCPY2D param;
 					memset(&param, 0, sizeof(param));
@@ -595,7 +635,7 @@ public:
 			CUdeviceptr cumem;
 			size_t cubytes;
 
-			cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, name));
+			cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));
 
 			if(cubytes == 8) {
 				/* 64 bit device pointer */
author	Sergey Sharybin <sergey.vfx@gmail.com>	2016-02-15 17:40:39 +0300
committer	Sergey Sharybin <sergey.vfx@gmail.com>	2016-02-15 23:26:29 +0300
commit	1c4f21f85e17ac557c9867a7764a31d5ebe74eb0 (patch)
tree	72ea537d6624e43e452ce7465f25f92832b88e4f /intern/cycles/device/device_cuda.cpp
parent	b5171e250c6816ecce26227615d53cf6f6339892 (diff)