6 files changed, 125 insertions, 137 deletions
diff --git a/extern/cuew/src/cuew.c b/extern/cuew/src/cuew.c
index a0146741494..f477ec48a18 100644
--- a/extern/cuew/src/cuew.c
+++ b/extern/cuew/src/cuew.c
@@ -683,23 +683,23 @@ static int cuewNvrtcInit(void) {
 
 
 int cuewInit(cuuint32_t flags) {
-	int result = CUEW_SUCCESS;
-
-	if (flags & CUEW_INIT_CUDA) {
-		result = cuewCudaInit();
-		if (result != CUEW_SUCCESS) {
-			return result;
-		}
-	}
-
-	if (flags & CUEW_INIT_NVRTC) {
-		result = cuewNvrtcInit();
-		if (result != CUEW_SUCCESS) {
-			return result;
-		}
-	}
-
-	return result;
+  int result = CUEW_SUCCESS;
+
+  if (flags & CUEW_INIT_CUDA) {
+    result = cuewCudaInit();
+    if (result != CUEW_SUCCESS) {
+      return result;
+    }
+  }
+
+  if (flags & CUEW_INIT_NVRTC) {
+    result = cuewNvrtcInit();
+    if (result != CUEW_SUCCESS) {
+      return result;
+    }
+  }
+
+  return result;
 }
 
 
@@ -798,7 +798,10 @@ static int path_exists(const char *path) {
 
 const char *cuewCompilerPath(void) {
 #ifdef _WIN32
-  const char *defaultpaths[] = {"C:/CUDA/bin", NULL};
+  const char *defaultpaths[] = {
+    "C:/CUDA/bin",
+    "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin",
+    NULL};
   const char *executable = "nvcc.exe";
 #else
   const char *defaultpaths[] = {
@@ -832,9 +835,12 @@ const char *cuewCompilerPath(void) {
     }
   }
 
-#ifndef _WIN32
   {
+#ifdef _WIN32
+    FILE *handle = popen("where nvcc", "r");
+#else
     FILE *handle = popen("which nvcc", "r");
+#endif
     if (handle) {
       char buffer[4096] = {0};
       int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
@@ -845,7 +851,6 @@ const char *cuewCompilerPath(void) {
       }
     }
   }
-#endif
 
   return NULL;
 }
@@ -859,23 +864,6 @@ int cuewNvrtcVersion(void) {
   return 0;
 }
 
-static size_t safe_strnlen(const char *s, size_t maxlen) {
-  size_t length;
-  for (length = 0; length < maxlen; s++, length++) {
-    if (*s == '\0') {
-      break;
-    }
-  }
-  return length;
-}
-
-static char *safe_strncpy(char *dest, const char *src, size_t n) {
-  const size_t src_len = safe_strnlen(src, n - 1);
-  memcpy(dest, src, src_len);
-  dest[src_len] = '\0';
-  return dest;
-}
-
 int cuewCompilerVersion(void) {
   const char *path = cuewCompilerPath();
   const char *marker = "Cuda compilation tools, release ";
@@ -891,8 +879,9 @@ int cuewCompilerVersion(void) {
   }
 
   /* get --version output */
-  safe_strncpy(command, path, sizeof(command));
-  strncat(command, " --version", sizeof(command) - strlen(path));
+  strncat(command, "\"", 1);
+  strncat(command, path, sizeof(command) - 1);
+  strncat(command, "\" --version", sizeof(command) - strlen(path) - 1);
   pipe = popen(command, "r");
   if (!pipe) {
     fprintf(stderr, "CUDA: failed to run compiler to retrieve version");
@@ -922,4 +911,3 @@ int cuewCompilerVersion(void) {
 
   return 10 * major + minor;
 }
-
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index 6f6bd7ec2cc..1014831c403 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -228,11 +228,8 @@ if(WITH_CYCLES_DEVICE_OPTIX)
       SYSTEM
       ${OPTIX_INCLUDE_DIR}
       )
-
-    # Need pre-compiled CUDA binaries in the OptiX device
-    set(WITH_CYCLES_CUDA_BINARIES ON)
   else()
-    message(STATUS "Optix not found, disabling it from Cycles")
+    message(STATUS "OptiX not found, disabling it from Cycles")
     set(WITH_CYCLES_DEVICE_OPTIX OFF)
   endif()
 endif()
diff --git a/intern/cycles/device/cuda/device_cuda.h b/intern/cycles/device/cuda/device_cuda.h
index 0f4543e6007..5820b525fd6 100644
--- a/intern/cycles/device/cuda/device_cuda.h
+++ b/intern/cycles/device/cuda/device_cuda.h
@@ -109,15 +109,13 @@ class CUDADevice : public Device {
 
   bool use_split_kernel();
 
-  string compile_kernel_get_common_cflags(const DeviceRequestedFeatures &requested_features,
-                                          bool filter = false,
-                                          bool split = false);
-
-  bool compile_check_compiler();
+  virtual string compile_kernel_get_common_cflags(
+      const DeviceRequestedFeatures &requested_features, bool filter = false, bool split = false);
 
   string compile_kernel(const DeviceRequestedFeatures &requested_features,
-                        bool filter = false,
-                        bool split = false);
+                        const char *name,
+                        const char *base = "cuda",
+                        bool force_ptx = false);
 
   virtual bool load_kernels(const DeviceRequestedFeatures &requested_features);
 
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
index a4e1c026263..cd37c4dd407 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -329,70 +329,27 @@ string CUDADevice::compile_kernel_get_common_cflags(
   return cflags;
 }
 
-bool CUDADevice::compile_check_compiler()
-{
-  const char *nvcc = cuewCompilerPath();
-  if (nvcc == NULL) {
-    cuda_error_message(
-        "CUDA nvcc compiler not found. "
-        "Install CUDA toolkit in default location.");
-    return false;
-  }
-  const int cuda_version = cuewCompilerVersion();
-  VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << cuda_version << ".";
-  const int major = cuda_version / 10, minor = cuda_version % 10;
-  if (cuda_version == 0) {
-    cuda_error_message("CUDA nvcc compiler version could not be parsed.");
-    return false;
-  }
-  if (cuda_version < 80) {
-    printf(
-        "Unsupported CUDA version %d.%d detected, "
-        "you need CUDA 8.0 or newer.\n",
-        major,
-        minor);
-    return false;
-  }
-  else if (cuda_version != 101) {
-    printf(
-        "CUDA version %d.%d detected, build may succeed but only "
-        "CUDA 10.1 is officially supported.\n",
-        major,
-        minor);
-  }
-  return true;
-}
-
 string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_features,
-                                  bool filter,
-                                  bool split)
+                                  const char *name,
+                                  const char *base,
+                                  bool force_ptx)
 {
-  const char *name, *source;
-  if (filter) {
-    name = "filter";
-    source = "filter.cu";
-  }
-  else if (split) {
-    name = "kernel_split";
-    source = "kernel_split.cu";
-  }
-  else {
-    name = "kernel";
-    source = "kernel.cu";
-  }
-  /* Compute cubin name. */
+  /* Compute kernel name. */
   int major, minor;
   cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
   cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
 
   /* Attempt to use kernel provided with Blender. */
   if (!use_adaptive_compilation()) {
-    const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
-    VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
-    if (path_exists(cubin)) {
-      VLOG(1) << "Using precompiled kernel.";
-      return cubin;
+    if (!force_ptx) {
+      const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
+      VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
+      if (path_exists(cubin)) {
+        VLOG(1) << "Using precompiled kernel.";
+        return cubin;
+      }
     }
+
     const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor));
     VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
     if (path_exists(ptx)) {
@@ -401,19 +358,21 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
     }
   }
 
-  const string common_cflags = compile_kernel_get_common_cflags(requested_features, filter, split);
-
   /* Try to use locally compiled kernel. */
-  const string source_path = path_get("source");
-  const string kernel_md5 = path_files_md5_hash(source_path);
+  string source_path = path_get("source");
+  const string source_md5 = path_files_md5_hash(source_path);
 
   /* We include cflags into md5 so changing cuda toolkit or changing other
    * compiler command line arguments makes sure cubin gets re-built.
    */
-  const string cubin_md5 = util_md5_string(kernel_md5 + common_cflags);
+  string common_cflags = compile_kernel_get_common_cflags(
+      requested_features, strstr(name, "filter") != NULL, strstr(name, "split") != NULL);
+  const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
 
+  const char *const kernel_ext = force_ptx ? "ptx" : "cubin";
+  const char *const kernel_arch = force_ptx ? "compute" : "sm";
   const string cubin_file = string_printf(
-      "cycles_%s_sm%d%d_%s.cubin", name, major, minor, cubin_md5.c_str());
+      "cycles_%s_%s_%d%d_%s.%s", name, kernel_arch, major, minor, kernel_md5.c_str(), kernel_ext);
   const string cubin = path_cache_get(path_join("kernels", cubin_file));
   VLOG(1) << "Testing for locally compiled kernel " << cubin << ".";
   if (path_exists(cubin)) {
@@ -422,7 +381,7 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
   }
 
 #  ifdef _WIN32
-  if (have_precompiled_kernels()) {
+  if (!use_adaptive_compilation() && have_precompiled_kernels()) {
     if (major < 3) {
       cuda_error_message(
           string_printf("CUDA device requires compute capability 3.0 or up, "
@@ -437,42 +396,69 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
                         major,
                         minor));
     }
-    return "";
+    return string();
   }
 #  endif
 
   /* Compile. */
-  if (!compile_check_compiler()) {
-    return "";
+  const char *const nvcc = cuewCompilerPath();
+  if (nvcc == NULL) {
+    cuda_error_message(
+        "CUDA nvcc compiler not found. "
+        "Install CUDA toolkit in default location.");
+    return string();
   }
-  const char *nvcc = cuewCompilerPath();
-  const string kernel = path_join(path_join(source_path, "kernel"),
-                                  path_join("kernels", path_join("cuda", source)));
+
+  const int nvcc_cuda_version = cuewCompilerVersion();
+  VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << nvcc_cuda_version << ".";
+  if (nvcc_cuda_version < 80) {
+    printf(
+        "Unsupported CUDA version %d.%d detected, "
+        "you need CUDA 8.0 or newer.\n",
+        nvcc_cuda_version / 10,
+        nvcc_cuda_version % 10);
+    return string();
+  }
+  else if (nvcc_cuda_version != 101) {
+    printf(
+        "CUDA version %d.%d detected, build may succeed but only "
+        "CUDA 10.1 is officially supported.\n",
+        nvcc_cuda_version / 10,
+        nvcc_cuda_version % 10);
+  }
+
   double starttime = time_dt();
-  printf("Compiling CUDA kernel ...\n");
 
   path_create_directories(cubin);
 
+  source_path = path_join(path_join(source_path, "kernel"),
+                          path_join("kernels", path_join(base, string_printf("%s.cu", name))));
+
   string command = string_printf(
       "\"%s\" "
-      "-arch=sm_%d%d "
-      "--cubin \"%s\" "
+      "-arch=%s_%d%d "
+      "--%s \"%s\" "
       "-o \"%s\" "
-      "%s ",
+      "%s",
       nvcc,
+      kernel_arch,
       major,
       minor,
-      kernel.c_str(),
+      kernel_ext,
+      source_path.c_str(),
       cubin.c_str(),
       common_cflags.c_str());
 
-  printf("%s\n", command.c_str());
+  printf("Compiling CUDA kernel ...\n%s\n", command.c_str());
 
-  if (system(command.c_str()) == -1) {
+#ifdef _WIN32
+  command = "call " + command;
+#endif
+  if (system(command.c_str()) != 0) {
     cuda_error_message(
         "Failed to execute compilation command, "
         "see console for details.");
-    return "";
+    return string();
   }
 
   /* Verify if compilation succeeded */
@@ -480,7 +466,7 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
     cuda_error_message(
         "CUDA kernel compilation failed, "
         "see console for details.");
-    return "";
+    return string();
   }
 
   printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
@@ -509,12 +495,14 @@ bool CUDADevice::load_kernels(const DeviceRequestedFeatures &requested_features)
     return false;
 
   /* get kernel */
-  string cubin = compile_kernel(requested_features, false, use_split_kernel());
-  if (cubin == "")
+  const char *kernel_name = use_split_kernel() ? "kernel_split" : "kernel";
+  string cubin = compile_kernel(requested_features, kernel_name);
+  if (cubin.empty())
     return false;
 
-  string filter_cubin = compile_kernel(requested_features, true, false);
-  if (filter_cubin == "")
+  const char *filter_name = "filter";
+  string filter_cubin = compile_kernel(requested_features, filter_name);
+  if (filter_cubin.empty())
     return false;
 
   /* open module */
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 5f991563738..6957adb478f 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -43,7 +43,6 @@ bool device_cuda_init()
       VLOG(1) << "Found precompiled kernels";
       result = true;
     }
-#    ifndef _WIN32
     else if (cuewCompilerPath() != NULL) {
       VLOG(1) << "Found CUDA compiler " << cuewCompilerPath();
       result = true;
@@ -52,7 +51,6 @@ bool device_cuda_init()
       VLOG(1) << "Neither precompiled kernels nor CUDA compiler was found,"
               << " unable to use CUDA";
     }
-#    endif
   }
   else {
     VLOG(1) << "CUEW initialization failed: "
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index 39110cc0959..2ce8bed3783 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -293,6 +293,23 @@ class OptiXDevice : public CUDADevice {
     return BVH_LAYOUT_OPTIX;
   }
 
+  string compile_kernel_get_common_cflags(const DeviceRequestedFeatures &requested_features,
+                                          bool filter,
+                                          bool /*split*/) override
+  {
+    // Split kernel is not supported in OptiX
+    string common_cflags = CUDADevice::compile_kernel_get_common_cflags(
+        requested_features, filter, false);
+
+    // Add OptiX SDK include directory to include paths
+    const char *optix_sdk_path = getenv("OPTIX_ROOT_DIR");
+    if (optix_sdk_path) {
+      common_cflags += string_printf(" -I\"%s/include\"", optix_sdk_path);
+    }
+
+    return common_cflags;
+  }
+
   bool load_kernels(const DeviceRequestedFeatures &requested_features) override
   {
     if (have_error()) {
@@ -367,9 +384,11 @@ class OptiXDevice : public CUDADevice {
     }
 
     {  // Load and compile PTX module with OptiX kernels
-      string ptx_data;
-      const string ptx_filename = "lib/kernel_optix.ptx";
-      if (!path_read_text(path_get(ptx_filename), ptx_data)) {
+      string ptx_data, ptx_filename = path_get("lib/kernel_optix.ptx");
+      if (use_adaptive_compilation()) {
+        ptx_filename = compile_kernel(requested_features, "kernel_optix", "optix", true);
+      }
+      if (ptx_filename.empty() || !path_read_text(ptx_filename, ptx_data)) {
         set_error("Failed loading OptiX kernel " + ptx_filename + ".");
         return false;
       }