Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <marcinjd@microsoft.com>2018-12-08 00:37:05 +0300
committerMarcin Junczys-Dowmunt <marcinjd@microsoft.com>2018-12-08 00:37:05 +0300
commit8b4b3dae1e4b1d1322c2ec677e051c85b4d788d7 (patch)
tree60f702ae578d4138cfb06b1a8512a85ddb67a677
parented25aa02fca90a1a748ac806064a5630c653183e (diff)
Address review comments
-rw-r--r--CMakeLists.txt19
-rwxr-xr-xsrc/3rd_party/cnpy/cnpy.cpp10
-rw-r--r--src/tensors/cpu/device.cpp6
-rwxr-xr-xsrc/training/communicator_nccl.h3
4 files changed, 31 insertions, 7 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 498247e9..943e3e36 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -33,6 +33,9 @@ message(STATUS "Project version: ${PROJECT_VERSION_STRING_FULL}")
execute_process(COMMAND git submodule update --init --recursive --no-fetch
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+# Detect support CPU instrinsics for the current platform. This will
+# only by used with BUILD_ARCH=native. For overridden BUILD_ARCH we
+# minimally use -msse4.1. This seems to work with MKL.
set(INTRINSICS "")
if(BUILD_ARCH STREQUAL "native")
message(STATUS "Checking support for CPU intrinsics")
@@ -75,8 +78,7 @@ else()
# These are used in src/CMakeLists.txt on a per-target basis
list(APPEND ALL_WARNINGS -Wall; -Werror; -Wno-unused-result; -Wno-deprecated; -Wno-pragmas; -Wno-unused-parameter; -Wextra; -Wno-unused-function;
- -Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; -Wno-missing-field-initializers;
- -Wno-unused-but-set-variable)
+ -Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; -Wno-missing-field-initializers;)
# This warning does not exist prior to gcc 5.0
if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0)
@@ -144,13 +146,16 @@ endif(USE_STATIC_LIBS)
list(APPEND CUDA_NVCC_FLAGS -DBOOST_PP_VARIADICS=0; )
endif()
+ # We compile NCCL ourselves, using the NVidia Makefile rather than CMake, this requires to pass a couple of parameters from
+ # Cmake. This is also fairly untested, let's hope it does not explode.
+ # @TODO: Make sure it does not use pre-installed NCCL headers
if(USE_NCCL)
# define and set the include dir for the generated nccl.h header
set(NCCL_HEADER_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/nccl/include")
include_directories(${NCCL_HEADER_LOCATION})
# set the path for the generated static lib
- set(NCCL_STATIC "${CMAKE_CURRENT_BINARY_DIR}/nccl/lib/libnccl_static.a")
+ set(NCCL_LIB_STATIC "${CMAKE_CURRENT_BINARY_DIR}/nccl/lib/libnccl_static.a")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_NCCL")
LIST(APPEND CUDA_NVCC_FLAGS -DUSE_NCCL; )
@@ -158,7 +163,9 @@ endif(USE_STATIC_LIBS)
# disables compilation for sm_30 to avoid ptxas warning... that's general Kepler support. But K80s are supported for instance by sm_35
set(GENCODE "-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61")
- # sets output to build, passes cuda location from FindCUDA, sets c++ compiler to the same one CMake uses.
+ # We build using NVidia's custom makefile, for that we pass a number of variables from CMake.
+ # Sets output to the chosen build folder, i.e. where the binaries and objects are generated.
+ # Also passes CUDA location from FindCUDA, sets c++ compiler to the same one CMake uses.
add_custom_command(OUTPUT ${NCCL_STATIC}
COMMAND make src.build
BUILDDIR=${CMAKE_CURRENT_BINARY_DIR}/nccl
@@ -166,9 +173,9 @@ endif(USE_STATIC_LIBS)
CUDA8_GENCODE=${GENCODE}
CXX=${CMAKE_CXX_COMPILER}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src/3rd_party/nccl)
- add_custom_target(nccl_target DEPENDS ${NCCL_STATIC})
+ add_custom_target(nccl_target DEPENDS ${NCCL_LIB_STATIC})
add_library(nccl STATIC IMPORTED)
- set_target_properties(nccl PROPERTIES IMPORTED_LOCATION ${NCCL_STATIC})
+ set_target_properties(nccl PROPERTIES IMPORTED_LOCATION ${NCCL_LIB_STATIC})
add_dependencies(nccl nccl_target)
set(EXT_LIBS ${EXT_LIBS} nccl)
diff --git a/src/3rd_party/cnpy/cnpy.cpp b/src/3rd_party/cnpy/cnpy.cpp
index be5e2bfd..9ad3c2fa 100755
--- a/src/3rd_party/cnpy/cnpy.cpp
+++ b/src/3rd_party/cnpy/cnpy.cpp
@@ -103,6 +103,9 @@ void cnpy::parse_npy_header(FILE* fp, unsigned int& word_size, unsigned int*& sh
word_size = atoi(str_ws.substr(0,loc2).c_str());
}
+// make compiler happy, otherwise warns with "variable set but not used"
+#define _unused(x) ((void)(x))
+
void cnpy::parse_zip_footer(FILE* fp, unsigned short& nrecs, unsigned int& global_header_size, unsigned int& global_header_offset)
{
std::vector<char> footer(22);
@@ -124,6 +127,13 @@ void cnpy::parse_zip_footer(FILE* fp, unsigned short& nrecs, unsigned int& globa
assert(disk_start == 0);
assert(nrecs_on_disk == nrecs);
assert(comment_len == 0);
+
+ // make compiler happy, otherwise warns with "variable set but not used"
+ // on the other hand it seems having the asserts in here is useful.
+ _unused(disk_no);
+ _unused(disk_start);
+ _unused(nrecs_on_disk);
+ _unused(comment_len);
}
cnpy::NpyArrayPtr load_the_npy_file(FILE* fp) {
diff --git a/src/tensors/cpu/device.cpp b/src/tensors/cpu/device.cpp
index c4710f5c..b0fdae92 100644
--- a/src/tensors/cpu/device.cpp
+++ b/src/tensors/cpu/device.cpp
@@ -15,6 +15,12 @@ Device::~Device() {
size_ = 0;
}
+// allocate function for tensor reserve() below.
+// Needed for AVX512, while not available on all compilers. It seems clang
+// does not have aligned_alloc for all cstlib versions. If AVX512 is not used
+// a simple malloc is probably fine.
+// Should generate a runtime error otherwise as we have a check in the AVX512
+// functions which tests for alignment.
#ifdef _WIN32
#define MALLOC(size) _aligned_alloc(size, alignment_)
#elif __GNUC__
diff --git a/src/training/communicator_nccl.h b/src/training/communicator_nccl.h
index 16c49bdb..bf1f07df 100755
--- a/src/training/communicator_nccl.h
+++ b/src/training/communicator_nccl.h
@@ -4,7 +4,8 @@
#include "3rd_party/threadpool.h"
#include "tensors/gpu/cuda_helpers.h"
-// Generated by NCCL make files in build/nccl/include
+// Generated by NCCL make files in build/nccl/include;
+// include dir has been set in CMake files. NCCL add version number etc.
#include "nccl.h"
#include <cuda_runtime.h>