Cycles: Include NVCC compiler flags into md5 hash

This way we can easily switch between toolkits without worrying whether some kernel was compiled with old or new CUDA toolkit. It's also now possible to switch machine architecture and have proper cached kernel detected. Not as if it happens every day, but i did such a bitness switch back in the days :)
author: Sergey Sharybin <sergey.vfx@gmail.com> 2016-08-02 12:24:42 +0300
committer: Sergey Sharybin <sergey.vfx@gmail.com> 2016-08-02 15:09:34 +0300
commit: ad48f13099c603ecb4358a1d0d304d2ec30e59d5 (patch)
tree: 8b2700e26108c269ea0c3eac280f0044fcf92515 /intern/cycles/device/device_cuda.cpp
parent: fb94f4b884245dc4fa2961d775f7b1e5c8d16d22 (diff)
1 files changed, 114 insertions, 75 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 80f2644fa8c..f88520479df 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -249,121 +249,160 @@ public:
 		return DebugFlags().cuda.adaptive_compile;
 	}
 
+	/* Common NVCC flags which stays the same regardless of shading model,
+	 * kernel sources md5 and only depends on compiler or compilation settings.
+	 */
+	string compile_kernel_get_common_cflags(
+	        const DeviceRequestedFeatures& requested_features)
+	{
+		const int cuda_version = cuewCompilerVersion();
+		const int machine = system_cpu_bits();
+		const string kernel_path = path_get("kernel");
+		const string include = kernel_path;
+		string cflags = string_printf("-m%d "
+		                              "--ptxas-options=\"-v\" "
+		                              "--use_fast_math "
+		                              "-DNVCC "
+		                              "-D__KERNEL_CUDA_VERSION__=%d "
+		                               "-I\"%s\"",
+		                              machine,
+		                              cuda_version,
+		                              include.c_str());
+		if(use_adaptive_compilation()) {
+			cflags += " " + requested_features.get_build_options();
+		}
+		const char *extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS");
+		if(extra_cflags) {
+			cflags += string(" ") + string(extra_cflags);
+		}
+#ifdef WITH_CYCLES_DEBUG
+		cflags += " -D__KERNEL_DEBUG__";
+#endif
+		return cflags;
+	}
+
+	bool compile_check_compiler() {
+		const char *nvcc = cuewCompilerPath();
+		if(nvcc == NULL) {
+			cuda_error_message("CUDA nvcc compiler not found. "
+			                   "Install CUDA toolkit in default location.");
+			return false;
+		}
+		int cuda_version = cuewCompilerVersion();
+		VLOG(1) << "Found nvcc " << nvcc
+		        << ", CUDA version " << cuda_version
+		        << ".";
+		if(cuda_version == 0) {
+			cuda_error_message("CUDA nvcc compiler version could not be parsed.");
+			return false;
+		}
+		if(cuda_version < 60) {
+			printf("Unsupported CUDA version %d.%d detected, "
+			       "you need CUDA 7.5 or newer.\n",
+			       cuda_version / 10, cuda_version % 10);
+			return false;
+		}
+		else if(cuda_version != 75 && cuda_version != 80) {
+			printf("CUDA version %d.%d detected, build may succeed but only "
+			       "CUDA 7.5 and 8.0 are officially supported.\n",
+			       cuda_version / 10, cuda_version % 10);
+		}
+		return true;
+	}
+
 	string compile_kernel(const DeviceRequestedFeatures& requested_features)
 	{
 		/* Compute cubin name. */
 		int major, minor;
 		cuDeviceComputeCapability(&major, &minor, cuDevId);
-		string cubin;
-
-		/* Adaptive Compile.
-		 * If enabled, always use that */
-		bool use_adaptive_compile = use_adaptive_compilation();
 
 		/* Attempt to use kernel provided with Blender. */
-		if(!use_adaptive_compile) {
-			cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor));
-			VLOG(1) << "Testing for pre-compiled kernel " << cubin;
+		if(!use_adaptive_compilation()) {
+			const string cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin",
+			                                            major, minor));
+			VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
 			if(path_exists(cubin)) {
-				VLOG(1) << "Using precompiled kernel";
+				VLOG(1) << "Using precompiled kernel.";
 				return cubin;
 			}
 		}
 
+		const string common_cflags =
+		        compile_kernel_get_common_cflags(requested_features);
+
 		/* Try to use locally compiled kernel. */
-		string kernel_path = path_get("kernel");
-		string md5 = path_files_md5_hash(kernel_path);
-
-		string feature_build_options;
-		if(use_adaptive_compile) {
-			feature_build_options = requested_features.get_build_options();
-			string device_md5 = util_md5_string(feature_build_options);
-			cubin = string_printf("cycles_kernel_%s_sm%d%d_%s.cubin",
-		                          device_md5.c_str(),
-		                          major, minor,
-		                          md5.c_str());
-		}
-		else {
-			cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str());
-		}
+		const string kernel_path = path_get("kernel");
+		const string kernel_md5 = path_files_md5_hash(kernel_path);
 
-		cubin = path_user_get(path_join("cache", cubin));
-		VLOG(1) << "Testing for locally compiled kernel " << cubin;
-		/* If exists already, use it. */
+		/* We include cflags into md5 so changing cuda toolkit or changing other
+		 * compiler command line arguments makes sure cubin gets re-built.
+		 */
+		const string cubin_md5 = util_md5_string(kernel_md5 + common_cflags);
+
+		const string cubin_file = string_printf("cycles_kernel_sm%d%d_%s.cubin",
+		                                        major, minor,
+		                                        cubin_md5.c_str());
+		const string cubin = path_user_get(path_join("cache", cubin_file));
+		VLOG(1) << "Testing for locally compiled kernel " << cubin << ".";
 		if(path_exists(cubin)) {
-			VLOG(1) << "Using locally compiled kernel";
+			VLOG(1) << "Using locally compiled kernel.";
 			return cubin;
 		}
 
 #ifdef _WIN32
 		if(have_precompiled_kernels()) {
-			if(major < 2)
-				cuda_error_message(string_printf("CUDA device requires compute capability 2.0 or up, found %d.%d. Your GPU is not supported.", major, minor));
-			else
-				cuda_error_message(string_printf("CUDA binary kernel for this graphics card compute capability (%d.%d) not found.", major, minor));
+			if(major < 2) {
+				cuda_error_message(string_printf(
+				        "CUDA device requires compute capability 2.0 or up, "
+				        "found %d.%d. Your GPU is not supported.",
+				        major, minor));
+			}
+			else {
+				cuda_error_message(string_printf(
+				        "CUDA binary kernel for this graphics card compute "
+				        "capability (%d.%d) not found.",
+				        major, minor));
+			}
 			return "";
 		}
 #endif
 
-		/* If not, find CUDA compiler. */
-		const char *nvcc = cuewCompilerPath();
-
-		if(nvcc == NULL) {
-			cuda_error_message("CUDA nvcc compiler not found. Install CUDA toolkit in default location.");
-			return "";
-		}
-
-		int cuda_version = cuewCompilerVersion();
-		VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << cuda_version;
-
-		if(cuda_version == 0) {
-			cuda_error_message("CUDA nvcc compiler version could not be parsed.");
-			return "";
-		}
-		if(cuda_version < 60) {
-			printf("Unsupported CUDA version %d.%d detected, you need CUDA 7.5 or newer.\n", cuda_version/10, cuda_version%10);
+		/* Compile. */
+		if(!compile_check_compiler()) {
 			return "";
 		}
-		else if(cuda_version != 75 && cuda_version != 80)
-			printf("CUDA version %d.%d detected, build may succeed but only CUDA 7.5 and 8.0 are officially supported.\n", cuda_version/10, cuda_version%10);
-
-		/* Compile. */
-		string kernel = path_join(kernel_path, path_join("kernels", path_join("cuda", "kernel.cu")));
-		string include = kernel_path;
-		const int machine = system_cpu_bits();
-
+		const char *nvcc = cuewCompilerPath();
+		string kernel = path_join(kernel_path,
+		                          path_join("kernels",
+		                                    path_join("cuda", "kernel.cu")));
 		double starttime = time_dt();
 		printf("Compiling CUDA kernel ...\n");
 
 		path_create_directories(cubin);
 
-		string command = string_printf("\"%s\" -arch=sm_%d%d -m%d --cubin \"%s\" "
-			"-o \"%s\" --ptxas-options=\"-v\" --use_fast_math -I\"%s\" "
-			"-DNVCC -D__KERNEL_CUDA_VERSION__=%d",
-			nvcc, major, minor, machine, kernel.c_str(), cubin.c_str(), include.c_str(), cuda_version);
-
-		if(use_adaptive_compile)
-			command += " " + feature_build_options;
-
-		const char* extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS");
-		if(extra_cflags) {
-			command += string(" ") + string(extra_cflags);
-		}
-
-#ifdef WITH_CYCLES_DEBUG
-		command += " -D__KERNEL_DEBUG__";
-#endif
+		string command = string_printf("\"%s\" "
+		                               "-arch=sm_%d%d "
+		                               "--cubin \"%s\" "
+		                               "-o \"%s\" "
+		                               "%s ",
+		                               nvcc,
+		                               major, minor,
+		                               kernel.c_str(),
+		                               cubin.c_str(),
+		                               common_cflags.c_str());
 
 		printf("%s\n", command.c_str());
 
 		if(system(command.c_str()) == -1) {
-			cuda_error_message("Failed to execute compilation command, see console for details.");
+			cuda_error_message("Failed to execute compilation command, "
+			                   "see console for details.");
 			return "";
 		}
 
 		/* Verify if compilation succeeded */
 		if(!path_exists(cubin)) {
-			cuda_error_message("CUDA kernel compilation failed, see console for details.");
+			cuda_error_message("CUDA kernel compilation failed, "
+			                   "see console for details.");
 			return "";
 		}
author	Sergey Sharybin <sergey.vfx@gmail.com>	2016-08-02 12:24:42 +0300
committer	Sergey Sharybin <sergey.vfx@gmail.com>	2016-08-02 15:09:34 +0300
commit	ad48f13099c603ecb4358a1d0d304d2ec30e59d5 (patch)
tree	8b2700e26108c269ea0c3eac280f0044fcf92515 /intern/cycles/device/device_cuda.cpp
parent	fb94f4b884245dc4fa2961d775f7b1e5c8d16d22 (diff)