diff options
author | Marcin Junczys-Dowmunt <marcinjd@microsoft.com> | 2018-12-08 00:37:05 +0300 |
---|---|---|
committer | Marcin Junczys-Dowmunt <marcinjd@microsoft.com> | 2018-12-08 00:37:05 +0300 |
commit | 8b4b3dae1e4b1d1322c2ec677e051c85b4d788d7 (patch) | |
tree | 60f702ae578d4138cfb06b1a8512a85ddb67a677 | |
parent | ed25aa02fca90a1a748ac806064a5630c653183e (diff) |
Address review comments
-rw-r--r-- | CMakeLists.txt | 19 | ||||
-rwxr-xr-x | src/3rd_party/cnpy/cnpy.cpp | 10 | ||||
-rw-r--r-- | src/tensors/cpu/device.cpp | 6 | ||||
-rwxr-xr-x | src/training/communicator_nccl.h | 3 |
4 files changed, 31 insertions, 7 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 498247e9..943e3e36 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,9 @@ message(STATUS "Project version: ${PROJECT_VERSION_STRING_FULL}") execute_process(COMMAND git submodule update --init --recursive --no-fetch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) +# Detect support CPU instrinsics for the current platform. This will +# only by used with BUILD_ARCH=native. For overridden BUILD_ARCH we +# minimally use -msse4.1. This seems to work with MKL. set(INTRINSICS "") if(BUILD_ARCH STREQUAL "native") message(STATUS "Checking support for CPU intrinsics") @@ -75,8 +78,7 @@ else() # These are used in src/CMakeLists.txt on a per-target basis list(APPEND ALL_WARNINGS -Wall; -Werror; -Wno-unused-result; -Wno-deprecated; -Wno-pragmas; -Wno-unused-parameter; -Wextra; -Wno-unused-function; - -Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; -Wno-missing-field-initializers; - -Wno-unused-but-set-variable) + -Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; -Wno-missing-field-initializers;) # This warning does not exist prior to gcc 5.0 if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0) @@ -144,13 +146,16 @@ endif(USE_STATIC_LIBS) list(APPEND CUDA_NVCC_FLAGS -DBOOST_PP_VARIADICS=0; ) endif() + # We compile NCCL ourselves, using the NVidia Makefile rather than CMake, this requires to pass a couple of parameters from + # Cmake. This is also fairly untested, let's hope it does not explode. + # @TODO: Make sure it does not use pre-installed NCCL headers if(USE_NCCL) # define and set the include dir for the generated nccl.h header set(NCCL_HEADER_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/nccl/include") include_directories(${NCCL_HEADER_LOCATION}) # set the path for the generated static lib - set(NCCL_STATIC "${CMAKE_CURRENT_BINARY_DIR}/nccl/lib/libnccl_static.a") + set(NCCL_LIB_STATIC "${CMAKE_CURRENT_BINARY_DIR}/nccl/lib/libnccl_static.a") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_NCCL") LIST(APPEND CUDA_NVCC_FLAGS -DUSE_NCCL; ) @@ -158,7 +163,9 @@ endif(USE_STATIC_LIBS) # disables compilation for sm_30 to avoid ptxas warning... that's general Kepler support. But K80s are supported for instance by sm_35 set(GENCODE "-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61") - # sets output to build, passes cuda location from FindCUDA, sets c++ compiler to the same one CMake uses. + # We build using NVidia's custom makefile, for that we pass a number of variables from CMake. + # Sets output to the chosen build folder, i.e. where the binaries and objects are generated. + # Also passes CUDA location from FindCUDA, sets c++ compiler to the same one CMake uses. add_custom_command(OUTPUT ${NCCL_STATIC} COMMAND make src.build BUILDDIR=${CMAKE_CURRENT_BINARY_DIR}/nccl @@ -166,9 +173,9 @@ endif(USE_STATIC_LIBS) CUDA8_GENCODE=${GENCODE} CXX=${CMAKE_CXX_COMPILER} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src/3rd_party/nccl) - add_custom_target(nccl_target DEPENDS ${NCCL_STATIC}) + add_custom_target(nccl_target DEPENDS ${NCCL_LIB_STATIC}) add_library(nccl STATIC IMPORTED) - set_target_properties(nccl PROPERTIES IMPORTED_LOCATION ${NCCL_STATIC}) + set_target_properties(nccl PROPERTIES IMPORTED_LOCATION ${NCCL_LIB_STATIC}) add_dependencies(nccl nccl_target) set(EXT_LIBS ${EXT_LIBS} nccl) diff --git a/src/3rd_party/cnpy/cnpy.cpp b/src/3rd_party/cnpy/cnpy.cpp index be5e2bfd..9ad3c2fa 100755 --- a/src/3rd_party/cnpy/cnpy.cpp +++ b/src/3rd_party/cnpy/cnpy.cpp @@ -103,6 +103,9 @@ void cnpy::parse_npy_header(FILE* fp, unsigned int& word_size, unsigned int*& sh word_size = atoi(str_ws.substr(0,loc2).c_str()); } +// make compiler happy, otherwise warns with "variable set but not used" +#define _unused(x) ((void)(x)) + void cnpy::parse_zip_footer(FILE* fp, unsigned short& nrecs, unsigned int& global_header_size, unsigned int& global_header_offset) { std::vector<char> footer(22); @@ -124,6 +127,13 @@ void cnpy::parse_zip_footer(FILE* fp, unsigned short& nrecs, unsigned int& globa assert(disk_start == 0); assert(nrecs_on_disk == nrecs); assert(comment_len == 0); + + // make compiler happy, otherwise warns with "variable set but not used" + // on the other hand it seems having the asserts in here is useful. + _unused(disk_no); + _unused(disk_start); + _unused(nrecs_on_disk); + _unused(comment_len); } cnpy::NpyArrayPtr load_the_npy_file(FILE* fp) { diff --git a/src/tensors/cpu/device.cpp b/src/tensors/cpu/device.cpp index c4710f5c..b0fdae92 100644 --- a/src/tensors/cpu/device.cpp +++ b/src/tensors/cpu/device.cpp @@ -15,6 +15,12 @@ Device::~Device() { size_ = 0; } +// allocate function for tensor reserve() below. +// Needed for AVX512, while not available on all compilers. It seems clang +// does not have aligned_alloc for all cstlib versions. If AVX512 is not used +// a simple malloc is probably fine. +// Should generate a runtime error otherwise as we have a check in the AVX512 +// functions which tests for alignment. #ifdef _WIN32 #define MALLOC(size) _aligned_alloc(size, alignment_) #elif __GNUC__ diff --git a/src/training/communicator_nccl.h b/src/training/communicator_nccl.h index 16c49bdb..bf1f07df 100755 --- a/src/training/communicator_nccl.h +++ b/src/training/communicator_nccl.h @@ -4,7 +4,8 @@ #include "3rd_party/threadpool.h" #include "tensors/gpu/cuda_helpers.h" -// Generated by NCCL make files in build/nccl/include +// Generated by NCCL make files in build/nccl/include; +// include dir has been set in CMake files. NCCL add version number etc. #include "nccl.h" #include <cuda_runtime.h> |