Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2018-12-13 00:33:04 +0300
committerMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2018-12-13 00:33:04 +0300
commit0ae07c5323e5fdba9efa2ae8faaeff9fb170f195 (patch)
tree670eb744ec0972469d15e386c8e0ae8fd6d7aea5
parentd602dda8e260878f7fe74567eb2189eb9c019acb (diff)
handle warnings in sse2 and avx code
-rw-r--r--CMakeLists.txt45
-rw-r--r--src/tensors/cpu/sharp/avx_gemm.cpp14
-rw-r--r--src/tensors/cpu/sharp/int_gemm.cpp6
-rwxr-xr-xsrc/tensors/tensor.h2
-rw-r--r--vs/BuildRelease.bat6
-rw-r--r--vs/CheckDeps.bat63
6 files changed, 53 insertions, 83 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7fc4b747..b77bde27 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -32,6 +32,28 @@ message(STATUS "Project version: ${PROJECT_VERSION_STRING_FULL}")
execute_process(COMMAND git submodule update --init --recursive --no-fetch
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+
+# Set compilation flags
+if(MSVC)
+# These are used in src/CMakeLists.txt on a per-target basis
+ list(APPEND ALL_WARNINGS /WX; /W4;)
+
+ # Disabled bogus warnings for CPU intrincics:
+ # C4310: cast truncates constant value
+ # C4324: 'marian::cpu::int16::`anonymous-namespace'::ScatterPut': structure was padded due to alignment specifier
+ set(DISABLE_GLOBALLY "/wd\"4310\" /wd\"4324\"")
+
+ set(INTRINSICS "/arch:AVX512")
+
+ set(CMAKE_CXX_FLAGS "/EHsc /DWIN32 /D_WINDOWS /DUNICODE /D_UNICODE /D_CRT_NONSTDC_NO_WARNINGS /D_CRT_SECURE_NO_WARNINGS ${DISABLE_GLOBALLY}")
+ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /MT /O2 ${INTRINSICS} /Zi /MP /GL /DNDEBUG")
+ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} /MTd /Od /Ob0 ${INTRINSICS} /RTC1 /Zi /D_DEBUG")
+ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DEBUG /LTCG:incremental /INCREMENTAL:NO /NODEFAULTLIB:MSVCRT")
+ set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /LTCG:incremental")
+
+ find_library(SHLWAPI Shlwapi.lib)
+ set(EXT_LIBS ${EXT_LIBS} SHLWAPI)
+else()
# Detect support CPU instrinsics for the current platform. This will
# only by used with BUILD_ARCH=native. For overridden BUILD_ARCH we
@@ -64,24 +86,11 @@ else()
set(INTRINSICS "-msse4.1")
endif()
-# Set compilation flags
-if(MSVC)
- set(CMAKE_CXX_FLAGS "/EHsc /DWIN32 /D_WINDOWS /DUNICODE /D_UNICODE /D_CRT_NONSTDC_NO_WARNINGS /D_CRT_SECURE_NO_WARNINGS /W4")
- set(CMAKE_CXX_FLAGS_RELEASE "/MT /O2 /W4 /Zi /MP /GL /DNDEBUG")
- set(CMAKE_CXX_FLAGS_DEBUG "/MTd /Od /Ob0 /RTC1 /Zi /D_DEBUG")
-
- set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DEBUG /LTCG:incremental /INCREMENTAL:NO /NODEFAULTLIB:MSVCRT")
-
- set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /LTCG:incremental")
-
- find_library(SHLWAPI Shlwapi.lib)
- set(EXT_LIBS ${EXT_LIBS} SHLWAPI)
-else()
- set(DISABLE_GLOBALLY "-Wno-unused-result")
+set(DISABLE_GLOBALLY "-Wno-unused-result")
- # These are used in src/CMakeLists.txt on a per-target basis
- list(APPEND ALL_WARNINGS -Wall; -Werror; -Wno-unused-result; -Wno-deprecated; -Wno-pragmas; -Wno-unused-parameter; -Wextra; -Wno-unused-function;
- -Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; -Wno-missing-field-initializers;)
+# These are used in src/CMakeLists.txt on a per-target basis
+list(APPEND ALL_WARNINGS -Wall; -Werror; -Wno-unused-result; -Wno-deprecated; -Wno-pragmas; -Wno-unused-parameter; -Wextra; -Wno-unused-function;
+ -Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; -Wno-missing-field-initializers;)
# This warning does not exist prior to gcc 5.0
if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0)
@@ -90,7 +99,7 @@ else()
set(CMAKE_CXX_FLAGS "-std=c++11 -O3 -Ofast -m64 -pthread -march=${BUILD_ARCH} ${INTRINSICS} -Wl,--no-as-needed -funroll-loops -ffinite-math-only -fPIC ${DISABLE_GLOBALLY}")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -g -rdynamic")
- set(CMAKE_CXX_FLAGS_DEBUG "-std=c++11 -g -rdynamic -O0 -pthread -Wl,--no-as-needed -fPIC -Wno-unused-result -Wno-deprecated -Werror -Wno-pragmas")
+ set(CMAKE_CXX_FLAGS_DEBUG "-std=c++11 -g -rdynamic -O0 -pthread -Wl,--no-as-needed -fPIC -Wno-unused-result -Wno-deprecated -Wno-pragmas")
set(CMAKE_CXX_FLAGS_SLIM "${CMAKE_CXX_FLAGS} -DNDEBUG")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS} -g -rdynamic")
set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -pg -g -rdynamic")
diff --git a/src/tensors/cpu/sharp/avx_gemm.cpp b/src/tensors/cpu/sharp/avx_gemm.cpp
index c65f91ce..c41b73eb 100644
--- a/src/tensors/cpu/sharp/avx_gemm.cpp
+++ b/src/tensors/cpu/sharp/avx_gemm.cpp
@@ -99,7 +99,8 @@ union IntAccess {
* _mm512_sra_epi32(sum, shift16));
*/
inline void Convert32Sum(__m512i &sum) {
- sum = _mm512_madd_epi16(sum, _mm512_set1_epi16(1));
+ short one = 1;
+ sum = _mm512_madd_epi16(sum, _mm512_set1_epi16(one));
}
// Two sum version.
@@ -114,7 +115,7 @@ inline ReducedPair Reduce16to32(__m512i sum1, __m512i sum2) {
_mm512_unpacklo_epi32(sum1, sum2));
// 1 2 1 2 1 2 1 2
__m256i halves = _mm256_add_epi32(_mm512_castsi512_si256(pack12),
- _mm512_extracti64x4_epi64(pack12, 1));
+ _mm512_extracti64x4_epi64(pack12, (short)1));
// 1 2 1 2
IntAccess a;
a.as_n = _mm_add_epi32(_mm256_castsi256_si128(halves),
@@ -144,7 +145,7 @@ inline __m128i Reduce32(__m512i sum1,
_mm512_unpacklo_epi64(pack12, pack34));
// Cut the register into halves and sum those. 1 2 3 4 1 2 3 4
__m256i halves = _mm256_add_epi32(_mm512_castsi512_si256(pack1234),
- _mm512_extracti64x4_epi64(pack1234, 1));
+ _mm512_extracti64x4_epi64(pack1234, (short)1));
// Again: cut the register into halves and sum those. 1 2 3 4
return _mm_add_epi32(_mm256_castsi256_si128(halves),
_mm256_extracti128_si256(halves, 1));
@@ -175,14 +176,14 @@ inline int32_t Reduce32(__m256i halves) {
inline int32_t Reduce32(__m512i sum1) {
// Fold register over itself.
return Reduce32(_mm256_add_epi32(_mm512_castsi512_si256(sum1),
- _mm512_extracti64x4_epi64(sum1, 1)));
+ _mm512_extracti64x4_epi64(sum1, (short)1)));
}
inline int32_t Reduce16to32(__m512i sum1) {
Convert32Sum(sum1);
// Fold register over itself.
return Reduce32(_mm256_add_epi32(_mm512_castsi512_si256(sum1),
- _mm512_extracti64x4_epi64(sum1, 1)));
+ _mm512_extracti64x4_epi64(sum1, (short)1)));
}
class ScatterPut {
@@ -204,7 +205,7 @@ public:
float_sums = _mm_mul_ps(float_sums, unquant_mult_sse_);
#ifdef __AVX512VL__
// The scatter instruction requires avx512vl
- _mm_i32scatter_ps(base, num_b_rows_scatter_, float_sums, 1);
+ _mm_i32scatter_ps(base, num_b_rows_scatter_, float_sums, (short)1);
#else
FloatAccess a;
// Get floats for each of the sums to write.
@@ -398,6 +399,7 @@ inline void Accum(const __m512i zeros,
// Choosing to approximate and do adds.
// Perhaps every so often we could accumulate by Convert32Sum
sum = _mm512_adds_epi16(sum, multiplied);
+ b; // make compiler happy
}
} // namespace
diff --git a/src/tensors/cpu/sharp/int_gemm.cpp b/src/tensors/cpu/sharp/int_gemm.cpp
index f85ae9a9..e04446bc 100644
--- a/src/tensors/cpu/sharp/int_gemm.cpp
+++ b/src/tensors/cpu/sharp/int_gemm.cpp
@@ -73,7 +73,7 @@ void Quantize8(marian::Tensor out,
const marian::Tensor in,
float clipValue) {
#ifdef __AVX512F__
- float quant_mult = 127.0 / clipValue;
+ float quant_mult = 127.0f / clipValue;
AVX_Quantize8(
in->data(), out->data<int8_t>(), quant_mult, in->shape().elements());
#else
@@ -165,8 +165,8 @@ void ProdInt8(marian::Tensor C,
#ifdef __AVX512F__
// This would be easy...
ABORT_IF(scale != 1, "Scale other than 1 not supported");
- float quant_mult = 127.0 / clipValue;
- float unquant_mult = 1.0 / (quant_mult * quant_mult);
+ float quant_mult = 127.0f / clipValue;
+ float unquant_mult = 1.0f / (quant_mult * quant_mult);
float* fC = C->data();
int num_A_rows = A->shape().elements() / A->shape()[-1];
diff --git a/src/tensors/tensor.h b/src/tensors/tensor.h
index 9721670b..acc7e54c 100755
--- a/src/tensors/tensor.h
+++ b/src/tensors/tensor.h
@@ -87,7 +87,7 @@ public:
request<T>(),
type_);
- T temp;
+ T temp = 0;
if(backend_->getDeviceId().type == DeviceType::cpu) {
std::copy(data<T>() + i, data<T>() + i + 1, &temp);
}
diff --git a/vs/BuildRelease.bat b/vs/BuildRelease.bat
index 6ea74cab..31215a15 100644
--- a/vs/BuildRelease.bat
+++ b/vs/BuildRelease.bat
@@ -17,10 +17,6 @@ if "%BUILD_ROOT%"=="" set BUILD_ROOT=%ROOT%build
call CreateVSProjects.bat %BUILD_ROOT%
if errorlevel 1 exit /b 1
-set _CL_=/utf-8
-
-REM -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH%
-
-cmake --build %BUILD_ROOT% --config Release
+cmake --build %BUILD_ROOT% --config Release
exit /b 0 \ No newline at end of file
diff --git a/vs/CheckDeps.bat b/vs/CheckDeps.bat
index 4af2aa93..e36a5eff 100644
--- a/vs/CheckDeps.bat
+++ b/vs/CheckDeps.bat
@@ -108,30 +108,12 @@ set CMAKE_OPT=
::
echo.
echo ... CUDA
-REM if "%CUDA_PATH%"=="" (
-REM echo The CUDA_PATH environment variable is not defined: please make sure CUDA 8.0+ is installed.
-REM exit /b 1
-REM )
-REM if not exist "%CUDA_PATH%" (
-REM echo CUDA_PATH is set to a non existing path:
-REM echo %CUDA_PATH%
-REM echo Please make sure CUDA 8.0+ is properly installed.
-REM exit /b 1
-REM )
-REM if not exist "%CUDA_PATH%\include\cuda.h" (
-REM echo CUDA header files were not found in this folder:
-REM echo "%CUDA_PATH%"
-REM echo Please make sure CUDA 8.0+ is properly installed.
-REM exit /b 1
-REM )
-REM if not exist "%CUDA_PATH%\lib\x64\cuda.lib" (
-REM echo CUDA library files were not found in this folder:
-REM echo "%CUDA_PATH%"
-REM echo Please make sure CUDA 8.0+ is properly installed.
-REM exit /b 1
-REM )
-
-echo Found Cuda SDK in %CUDA_PATH%
+if "%CUDA_PATH%"=="" (
+ echo The CUDA_PATH environment variable is not defined: this will compile only the CPU version.
+)
+else (
+ echo Found Cuda SDK in %CUDA_PATH%
+)
:: -------------------------
:: The MKL setup does not set any environment variable to the installation path.
@@ -215,40 +197,21 @@ if "%OPENSSL_ROOT_DIR%"=="" (
set OPENSSL_ROOT_DIR=%VCPKG_INSTALL%
)
-REM if not exist "%OPENSSL_ROOT_DIR%" (
-REM echo OPENSSL_ROOT_DIR is set to a non existing path:
-REM echo "%OPENSSL_ROOT_DIR%"
-REM echo Please set OPENSSL_ROOT_DIR to the installation path of the OpenSLL library.
-REM exit /b 1
-REM )
-REM if not exist "%OPENSSL_ROOT_DIR%\include\openssl\opensslv.h" (
-REM echo OpenSSL header files were not found in this folder:
-REM echo "%OPENSSL_ROOT_DIR%"
-REM echo Please make sure OpenSSL is correctly installed.
-REM exit /b 1
-REM )
-REM if not exist "%OPENSSL_ROOT_DIR%\lib\ssleay32.lib" (
-REM echo OpenSSL library file were not found in this folder:
-REM echo "%OPENSSL_ROOT_DIR%"
-REM echo Please make sure OpenSSL is correctly installed.
-REM exit /b 1
-)
-
-echo Found OpenSSL library in "%OPENSSL_ROOT_DIR%"
-
-set _CL_=/utf-8
-set LIBRARY_PATH=%CURRENT_PATH%\deps\proto
-
+if not exist "%VCPKG_INSTALL%/bin/protoc.exe" (
mkdir build
cd build
git clone https://github.com/protocolbuffers/protobuf
cd protobuf
-git checkout v.3.6.1
+git checkout v3.6.1
cd cmake
-cmake . -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH%
+cmake . -A x64 -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX=%VCPKG_INSTALL%
cmake --build . --config Release --target install
cd ..\..\..
+)
+
+set CMAKE_PREFIX_PATH=%VCPKG_INSTALL%
+
echo.
echo.
echo --------------------------------------------------