diff options
author | Ray Molenkamp <github@lazydodo.com> | 2018-02-03 20:59:09 +0300 |
---|---|---|
committer | Ray Molenkamp <github@lazydodo.com> | 2018-02-03 20:59:09 +0300 |
commit | a5052770b85fefe00511886429e6fc1f5056e1e8 (patch) | |
tree | 5dbe529f230833e71ee2504657ccec32364f4a01 /intern/cycles/app | |
parent | db989e1f118071aae6dcd9f29d10182bd5ebed0b (diff) |
cycles: Add an nvrtc based cubin cli compiler.
nvcc is very picky regarding compiler versions, severely limiting the compiler we can use, this commit adds a nvrtc based compiler that'll allow us to build the cubins even if the host compiler is unsupported. for details see D2913.
Differential Revision: http://developer.blender.org/D2913
Diffstat (limited to 'intern/cycles/app')
-rw-r--r-- | intern/cycles/app/CMakeLists.txt | 24 | ||||
-rw-r--r-- | intern/cycles/app/cycles_cubin_cc.cpp | 284 |
2 files changed, 308 insertions, 0 deletions
diff --git a/intern/cycles/app/CMakeLists.txt b/intern/cycles/app/CMakeLists.txt index 08a3931ef46..9ebeceb1659 100644 --- a/intern/cycles/app/CMakeLists.txt +++ b/intern/cycles/app/CMakeLists.txt @@ -120,3 +120,27 @@ if(WITH_CYCLES_NETWORK) endif() unset(SRC) endif() + +if(WITH_CYCLES_CUBIN_COMPILER) + # 32 bit windows is special, nvrtc is not supported on x86, so even + # though we are building 32 bit blender a 64 bit cubin_cc will have + # to be build to compile the cubins. + if(MSVC AND NOT CMAKE_CL_64) + Message("cycles_cubin_cc not supported on x86") + else() + set(SRC cycles_cubin_cc.cpp) + set(INC ../../../extern/cuew/include) + add_executable(cycles_cubin_cc ${SRC}) + include_directories(${INC}) + target_link_libraries(cycles_cubin_cc + extern_cuew + ${OPENIMAGEIO_LIBRARIES} + ${PLATFORM_LINKLIBS} + ) + if(NOT CYCLES_STANDALONE_REPOSITORY) + target_link_libraries(cycles_cubin_cc bf_intern_guardedalloc) + endif() + unset(SRC) + unset(INC) + endif() +endif() diff --git a/intern/cycles/app/cycles_cubin_cc.cpp b/intern/cycles/app/cycles_cubin_cc.cpp new file mode 100644 index 00000000000..c1f3974be6d --- /dev/null +++ b/intern/cycles/app/cycles_cubin_cc.cpp @@ -0,0 +1,284 @@ +/* + * Copyright 2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdio.h> +#include <stdint.h> + +#include <string> +#include <vector> + +#include <OpenImageIO/argparse.h> +#include <OpenImageIO/filesystem.h> + +#include "cuew.h" + +#ifdef _MSC_VER +# include <Windows.h> +#endif + +using std::string; +using std::vector; + +class CompilationSettings +{ +public: + CompilationSettings() + : target_arch(0), + bits(64), + verbose(false), + fast_math(false) + {} + + string cuda_toolkit_dir; + string input_file; + string output_file; + string ptx_file; + vector<string> defines; + vector<string> includes; + int target_arch; + int bits; + bool verbose; + bool fast_math; +}; + +bool compile_cuda(CompilationSettings &settings) +{ + const char* headers[] = {"stdlib.h" , "float.h", "math.h", "stdio.h"}; + const char* header_content[] = {"\n", "\n", "\n", "\n"}; + + printf("Building %s\n", settings.input_file.c_str()); + + string code; + if(!OIIO::Filesystem::read_text_file(settings.input_file, code)) { + fprintf(stderr, "Error: unable to read %s\n", settings.input_file.c_str()); + return false; + } + + vector<string> options; + for(size_t i = 0; i < settings.includes.size(); i++) { + options.push_back("-I" + settings.includes[i]); + } + + for(size_t i = 0; i < settings.defines.size(); i++) { + options.push_back("-D" + settings.defines[i]); + } + + options.push_back("-arch=compute_" + std::to_string(settings.target_arch)); + options.push_back("--device-as-default-execution-space"); + if(settings.fast_math) + options.push_back("--use_fast_math"); + + nvrtcProgram prog; + nvrtcResult result = nvrtcCreateProgram(&prog, + code.c_str(), // buffer + NULL, // name + sizeof(headers) / sizeof(void*), // numHeaders + header_content, // headers + headers); // includeNames + + if(result != NVRTC_SUCCESS) { + fprintf(stderr, "Error: nvrtcCreateProgram failed (%x)\n\n", result); + return false; + } + + /* Tranfer options to a classic C array. */ + vector<const char*> opts(options.size()); + for(size_t i = 0; i < options.size(); i++) { + opts[i] = options[i].c_str(); + } + + result = nvrtcCompileProgram(prog, options.size(), &opts[0]); + + if(result != NVRTC_SUCCESS) { + fprintf(stderr, "Error: nvrtcCompileProgram failed (%x)\n\n", result); + + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + + vector<char> log(log_size); + nvrtcGetProgramLog(prog, &log[0]); + fprintf(stderr, "%s\n", &log[0]); + + return false; + } + + /* Retrieve the ptx code. */ + size_t ptx_size; + result = nvrtcGetPTXSize(prog, &ptx_size); + if(result != NVRTC_SUCCESS) { + fprintf(stderr, "Error: nvrtcGetPTXSize failed (%x)\n\n", result); + return false; + } + + vector<char> ptx_code(ptx_size); + result = nvrtcGetPTX(prog, &ptx_code[0]); + if(result != NVRTC_SUCCESS) { + fprintf(stderr, "Error: nvrtcGetPTX failed (%x)\n\n", result); + return false; + } + + /* Write a file in the temp folder with the ptx code. */ + settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" + OIIO::Filesystem::unique_path(); + FILE * f= fopen(settings.ptx_file.c_str(), "wb"); + fwrite(&ptx_code[0], 1, ptx_size, f); + fclose(f); + + return true; +} + +bool link_ptxas(CompilationSettings &settings) +{ + string cudapath = ""; + if(settings.cuda_toolkit_dir.size()) + cudapath = settings.cuda_toolkit_dir + "/bin/"; + + string ptx = "\"" +cudapath + "ptxas\" " + settings.ptx_file + + " -o " + settings.output_file + + " --gpu-name sm_" + std::to_string(settings.target_arch) + + " -m" + std::to_string(settings.bits); + + if(settings.verbose) + ptx += " --verbose"; + + int pxresult = system(ptx.c_str()); + if(pxresult) { + fprintf(stderr, "Error: ptxas failed (%x)\n\n", pxresult); + return false; + } + + if(!OIIO::Filesystem::remove(settings.ptx_file)) { + fprintf(stderr, "Error: removing %s\n\n", settings.ptx_file.c_str()); + } + + return true; +} + +bool init(CompilationSettings &settings) +{ +#ifdef _MSC_VER + if(settings.cuda_toolkit_dir.size()) { + SetDllDirectory((settings.cuda_toolkit_dir + "/bin").c_str()); + } +#endif + + int cuewresult = cuewInit(); + if(cuewresult != CUEW_SUCCESS) { + fprintf(stderr, "Error: cuew init fialed (0x%x)\n\n", cuewresult); + return false; + } + + if(cuewNvrtcVersion() < 80) { + fprintf(stderr, "Error: only cuda 8 and higher is supported, %d\n\n", cuewCompilerVersion()); + return false; + } + + if(!nvrtcCreateProgram) { + fprintf(stderr, "Error: nvrtcCreateProgram not resolved\n"); + return false; + } + + if(!nvrtcCompileProgram) { + fprintf(stderr, "Error: nvrtcCompileProgram not resolved\n"); + return false; + } + + if(!nvrtcGetProgramLogSize) { + fprintf(stderr, "Error: nvrtcGetProgramLogSize not resolved\n"); + return false; + } + + if(!nvrtcGetProgramLog) { + fprintf(stderr, "Error: nvrtcGetProgramLog not resolved\n"); + return false; + } + + if(!nvrtcGetPTXSize) { + fprintf(stderr, "Error: nvrtcGetPTXSize not resolved\n"); + return false; + } + + if(!nvrtcGetPTX) { + fprintf(stderr, "Error: nvrtcGetPTX not resolved\n"); + return false; + } + + return true; +} + +bool parse_parameters(int argc, const char **argv, CompilationSettings &settings) +{ + OIIO::ArgParse ap; + ap.options("Usage: cycles_cubin_cc [options]", + "-target %d", &settings.target_arch, "target shader model", + "-m %d", &settings.bits, "Cuda architecture bits", + "-i %s", &settings.input_file, "Input source filename", + "-o %s", &settings.output_file, "Output cubin filename", + "-I %L", &settings.includes, "Add additional includepath", + "-D %L", &settings.defines, "Add additional defines", + "-v", &settings.verbose, "Use verbose logging", + "--use_fast_math", &settings.fast_math, "Use fast math", + "-cuda-toolkit-dir %s", &settings.cuda_toolkit_dir, "path to the cuda toolkit binary directory", + NULL); + + if(ap.parse(argc, argv) < 0) { + fprintf(stderr, "%s\n", ap.geterror().c_str()); + ap.usage(); + return false; + } + + if(!settings.output_file.size()) { + fprintf(stderr, "Error: Output file not set(-o), required\n\n"); + return false; + } + + if(!settings.input_file.size()) { + fprintf(stderr, "Error: Input file not set(-i, required\n\n"); + return false; + } + + if(!settings.target_arch) { + fprintf(stderr, "Error: target shader model not set (-target), required\n\n"); + return false; + } + + return true; +} + +int main(int argc, const char **argv) +{ + CompilationSettings settings; + + if(!parse_parameters(argc, argv, settings)) { + fprintf(stderr, "Error: invalid parameters, exiting\n"); + exit(EXIT_FAILURE); + } + + if(!init(settings)) { + fprintf(stderr, "Error: initialization error, exiting\n"); + exit(EXIT_FAILURE); + } + + if(!compile_cuda(settings)) { + fprintf(stderr, "Error: compilation error, exiting\n"); + exit(EXIT_FAILURE); + } + + if(!link_ptxas(settings)) { + exit(EXIT_FAILURE); + } + + return 0; +} |