diff options
author | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2018-12-13 00:33:04 +0300 |
---|---|---|
committer | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2018-12-13 00:33:04 +0300 |
commit | 0ae07c5323e5fdba9efa2ae8faaeff9fb170f195 (patch) | |
tree | 670eb744ec0972469d15e386c8e0ae8fd6d7aea5 | |
parent | d602dda8e260878f7fe74567eb2189eb9c019acb (diff) |
handle warnings in sse2 and avx code
-rw-r--r-- | CMakeLists.txt | 45 | ||||
-rw-r--r-- | src/tensors/cpu/sharp/avx_gemm.cpp | 14 | ||||
-rw-r--r-- | src/tensors/cpu/sharp/int_gemm.cpp | 6 | ||||
-rwxr-xr-x | src/tensors/tensor.h | 2 | ||||
-rw-r--r-- | vs/BuildRelease.bat | 6 | ||||
-rw-r--r-- | vs/CheckDeps.bat | 63 |
6 files changed, 53 insertions, 83 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 7fc4b747..b77bde27 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,6 +32,28 @@ message(STATUS "Project version: ${PROJECT_VERSION_STRING_FULL}") execute_process(COMMAND git submodule update --init --recursive --no-fetch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + +# Set compilation flags +if(MSVC) +# These are used in src/CMakeLists.txt on a per-target basis + list(APPEND ALL_WARNINGS /WX; /W4;) + + # Disabled bogus warnings for CPU intrincics: + # C4310: cast truncates constant value + # C4324: 'marian::cpu::int16::`anonymous-namespace'::ScatterPut': structure was padded due to alignment specifier + set(DISABLE_GLOBALLY "/wd\"4310\" /wd\"4324\"") + + set(INTRINSICS "/arch:AVX512") + + set(CMAKE_CXX_FLAGS "/EHsc /DWIN32 /D_WINDOWS /DUNICODE /D_UNICODE /D_CRT_NONSTDC_NO_WARNINGS /D_CRT_SECURE_NO_WARNINGS ${DISABLE_GLOBALLY}") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /MT /O2 ${INTRINSICS} /Zi /MP /GL /DNDEBUG") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} /MTd /Od /Ob0 ${INTRINSICS} /RTC1 /Zi /D_DEBUG") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DEBUG /LTCG:incremental /INCREMENTAL:NO /NODEFAULTLIB:MSVCRT") + set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /LTCG:incremental") + + find_library(SHLWAPI Shlwapi.lib) + set(EXT_LIBS ${EXT_LIBS} SHLWAPI) +else() # Detect support CPU instrinsics for the current platform. This will # only by used with BUILD_ARCH=native. For overridden BUILD_ARCH we @@ -64,24 +86,11 @@ else() set(INTRINSICS "-msse4.1") endif() -# Set compilation flags -if(MSVC) - set(CMAKE_CXX_FLAGS "/EHsc /DWIN32 /D_WINDOWS /DUNICODE /D_UNICODE /D_CRT_NONSTDC_NO_WARNINGS /D_CRT_SECURE_NO_WARNINGS /W4") - set(CMAKE_CXX_FLAGS_RELEASE "/MT /O2 /W4 /Zi /MP /GL /DNDEBUG") - set(CMAKE_CXX_FLAGS_DEBUG "/MTd /Od /Ob0 /RTC1 /Zi /D_DEBUG") - - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DEBUG /LTCG:incremental /INCREMENTAL:NO /NODEFAULTLIB:MSVCRT") - - set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /LTCG:incremental") - - find_library(SHLWAPI Shlwapi.lib) - set(EXT_LIBS ${EXT_LIBS} SHLWAPI) -else() - set(DISABLE_GLOBALLY "-Wno-unused-result") +set(DISABLE_GLOBALLY "-Wno-unused-result") - # These are used in src/CMakeLists.txt on a per-target basis - list(APPEND ALL_WARNINGS -Wall; -Werror; -Wno-unused-result; -Wno-deprecated; -Wno-pragmas; -Wno-unused-parameter; -Wextra; -Wno-unused-function; - -Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; -Wno-missing-field-initializers;) +# These are used in src/CMakeLists.txt on a per-target basis +list(APPEND ALL_WARNINGS -Wall; -Werror; -Wno-unused-result; -Wno-deprecated; -Wno-pragmas; -Wno-unused-parameter; -Wextra; -Wno-unused-function; + -Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; -Wno-missing-field-initializers;) # This warning does not exist prior to gcc 5.0 if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0) @@ -90,7 +99,7 @@ else() set(CMAKE_CXX_FLAGS "-std=c++11 -O3 -Ofast -m64 -pthread -march=${BUILD_ARCH} ${INTRINSICS} -Wl,--no-as-needed -funroll-loops -ffinite-math-only -fPIC ${DISABLE_GLOBALLY}") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -g -rdynamic") - set(CMAKE_CXX_FLAGS_DEBUG "-std=c++11 -g -rdynamic -O0 -pthread -Wl,--no-as-needed -fPIC -Wno-unused-result -Wno-deprecated -Werror -Wno-pragmas") + set(CMAKE_CXX_FLAGS_DEBUG "-std=c++11 -g -rdynamic -O0 -pthread -Wl,--no-as-needed -fPIC -Wno-unused-result -Wno-deprecated -Wno-pragmas") set(CMAKE_CXX_FLAGS_SLIM "${CMAKE_CXX_FLAGS} -DNDEBUG") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS} -g -rdynamic") set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -pg -g -rdynamic") diff --git a/src/tensors/cpu/sharp/avx_gemm.cpp b/src/tensors/cpu/sharp/avx_gemm.cpp index c65f91ce..c41b73eb 100644 --- a/src/tensors/cpu/sharp/avx_gemm.cpp +++ b/src/tensors/cpu/sharp/avx_gemm.cpp @@ -99,7 +99,8 @@ union IntAccess { * _mm512_sra_epi32(sum, shift16)); */ inline void Convert32Sum(__m512i &sum) { - sum = _mm512_madd_epi16(sum, _mm512_set1_epi16(1)); + short one = 1; + sum = _mm512_madd_epi16(sum, _mm512_set1_epi16(one)); } // Two sum version. @@ -114,7 +115,7 @@ inline ReducedPair Reduce16to32(__m512i sum1, __m512i sum2) { _mm512_unpacklo_epi32(sum1, sum2)); // 1 2 1 2 1 2 1 2 __m256i halves = _mm256_add_epi32(_mm512_castsi512_si256(pack12), - _mm512_extracti64x4_epi64(pack12, 1)); + _mm512_extracti64x4_epi64(pack12, (short)1)); // 1 2 1 2 IntAccess a; a.as_n = _mm_add_epi32(_mm256_castsi256_si128(halves), @@ -144,7 +145,7 @@ inline __m128i Reduce32(__m512i sum1, _mm512_unpacklo_epi64(pack12, pack34)); // Cut the register into halves and sum those. 1 2 3 4 1 2 3 4 __m256i halves = _mm256_add_epi32(_mm512_castsi512_si256(pack1234), - _mm512_extracti64x4_epi64(pack1234, 1)); + _mm512_extracti64x4_epi64(pack1234, (short)1)); // Again: cut the register into halves and sum those. 1 2 3 4 return _mm_add_epi32(_mm256_castsi256_si128(halves), _mm256_extracti128_si256(halves, 1)); @@ -175,14 +176,14 @@ inline int32_t Reduce32(__m256i halves) { inline int32_t Reduce32(__m512i sum1) { // Fold register over itself. return Reduce32(_mm256_add_epi32(_mm512_castsi512_si256(sum1), - _mm512_extracti64x4_epi64(sum1, 1))); + _mm512_extracti64x4_epi64(sum1, (short)1))); } inline int32_t Reduce16to32(__m512i sum1) { Convert32Sum(sum1); // Fold register over itself. return Reduce32(_mm256_add_epi32(_mm512_castsi512_si256(sum1), - _mm512_extracti64x4_epi64(sum1, 1))); + _mm512_extracti64x4_epi64(sum1, (short)1))); } class ScatterPut { @@ -204,7 +205,7 @@ public: float_sums = _mm_mul_ps(float_sums, unquant_mult_sse_); #ifdef __AVX512VL__ // The scatter instruction requires avx512vl - _mm_i32scatter_ps(base, num_b_rows_scatter_, float_sums, 1); + _mm_i32scatter_ps(base, num_b_rows_scatter_, float_sums, (short)1); #else FloatAccess a; // Get floats for each of the sums to write. @@ -398,6 +399,7 @@ inline void Accum(const __m512i zeros, // Choosing to approximate and do adds. // Perhaps every so often we could accumulate by Convert32Sum sum = _mm512_adds_epi16(sum, multiplied); + b; // make compiler happy } } // namespace diff --git a/src/tensors/cpu/sharp/int_gemm.cpp b/src/tensors/cpu/sharp/int_gemm.cpp index f85ae9a9..e04446bc 100644 --- a/src/tensors/cpu/sharp/int_gemm.cpp +++ b/src/tensors/cpu/sharp/int_gemm.cpp @@ -73,7 +73,7 @@ void Quantize8(marian::Tensor out, const marian::Tensor in, float clipValue) { #ifdef __AVX512F__ - float quant_mult = 127.0 / clipValue; + float quant_mult = 127.0f / clipValue; AVX_Quantize8( in->data(), out->data<int8_t>(), quant_mult, in->shape().elements()); #else @@ -165,8 +165,8 @@ void ProdInt8(marian::Tensor C, #ifdef __AVX512F__ // This would be easy... ABORT_IF(scale != 1, "Scale other than 1 not supported"); - float quant_mult = 127.0 / clipValue; - float unquant_mult = 1.0 / (quant_mult * quant_mult); + float quant_mult = 127.0f / clipValue; + float unquant_mult = 1.0f / (quant_mult * quant_mult); float* fC = C->data(); int num_A_rows = A->shape().elements() / A->shape()[-1]; diff --git a/src/tensors/tensor.h b/src/tensors/tensor.h index 9721670b..acc7e54c 100755 --- a/src/tensors/tensor.h +++ b/src/tensors/tensor.h @@ -87,7 +87,7 @@ public: request<T>(), type_); - T temp; + T temp = 0; if(backend_->getDeviceId().type == DeviceType::cpu) { std::copy(data<T>() + i, data<T>() + i + 1, &temp); } diff --git a/vs/BuildRelease.bat b/vs/BuildRelease.bat index 6ea74cab..31215a15 100644 --- a/vs/BuildRelease.bat +++ b/vs/BuildRelease.bat @@ -17,10 +17,6 @@ if "%BUILD_ROOT%"=="" set BUILD_ROOT=%ROOT%build call CreateVSProjects.bat %BUILD_ROOT%
if errorlevel 1 exit /b 1
-set _CL_=/utf-8
-
-REM -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH%
-
-cmake --build %BUILD_ROOT% --config Release
+cmake --build %BUILD_ROOT% --config Release
exit /b 0
\ No newline at end of file diff --git a/vs/CheckDeps.bat b/vs/CheckDeps.bat index 4af2aa93..e36a5eff 100644 --- a/vs/CheckDeps.bat +++ b/vs/CheckDeps.bat @@ -108,30 +108,12 @@ set CMAKE_OPT= ::
echo.
echo ... CUDA
-REM if "%CUDA_PATH%"=="" (
-REM echo The CUDA_PATH environment variable is not defined: please make sure CUDA 8.0+ is installed.
-REM exit /b 1
-REM )
-REM if not exist "%CUDA_PATH%" (
-REM echo CUDA_PATH is set to a non existing path:
-REM echo %CUDA_PATH%
-REM echo Please make sure CUDA 8.0+ is properly installed.
-REM exit /b 1
-REM )
-REM if not exist "%CUDA_PATH%\include\cuda.h" (
-REM echo CUDA header files were not found in this folder:
-REM echo "%CUDA_PATH%"
-REM echo Please make sure CUDA 8.0+ is properly installed.
-REM exit /b 1
-REM )
-REM if not exist "%CUDA_PATH%\lib\x64\cuda.lib" (
-REM echo CUDA library files were not found in this folder:
-REM echo "%CUDA_PATH%"
-REM echo Please make sure CUDA 8.0+ is properly installed.
-REM exit /b 1
-REM )
-
-echo Found Cuda SDK in %CUDA_PATH%
+if "%CUDA_PATH%"=="" (
+ echo The CUDA_PATH environment variable is not defined: this will compile only the CPU version.
+)
+else (
+ echo Found Cuda SDK in %CUDA_PATH%
+)
:: -------------------------
:: The MKL setup does not set any environment variable to the installation path.
@@ -215,40 +197,21 @@ if "%OPENSSL_ROOT_DIR%"=="" ( set OPENSSL_ROOT_DIR=%VCPKG_INSTALL%
)
-REM if not exist "%OPENSSL_ROOT_DIR%" (
-REM echo OPENSSL_ROOT_DIR is set to a non existing path:
-REM echo "%OPENSSL_ROOT_DIR%"
-REM echo Please set OPENSSL_ROOT_DIR to the installation path of the OpenSLL library.
-REM exit /b 1
-REM )
-REM if not exist "%OPENSSL_ROOT_DIR%\include\openssl\opensslv.h" (
-REM echo OpenSSL header files were not found in this folder:
-REM echo "%OPENSSL_ROOT_DIR%"
-REM echo Please make sure OpenSSL is correctly installed.
-REM exit /b 1
-REM )
-REM if not exist "%OPENSSL_ROOT_DIR%\lib\ssleay32.lib" (
-REM echo OpenSSL library file were not found in this folder:
-REM echo "%OPENSSL_ROOT_DIR%"
-REM echo Please make sure OpenSSL is correctly installed.
-REM exit /b 1
-)
-
-echo Found OpenSSL library in "%OPENSSL_ROOT_DIR%"
-
-set _CL_=/utf-8
-set LIBRARY_PATH=%CURRENT_PATH%\deps\proto
-
+if not exist "%VCPKG_INSTALL%/bin/protoc.exe" (
mkdir build
cd build
git clone https://github.com/protocolbuffers/protobuf
cd protobuf
-git checkout v.3.6.1
+git checkout v3.6.1
cd cmake
-cmake . -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH%
+cmake . -A x64 -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX=%VCPKG_INSTALL%
cmake --build . --config Release --target install
cd ..\..\..
+)
+
+set CMAKE_PREFIX_PATH=%VCPKG_INSTALL%
+
echo.
echo.
echo --------------------------------------------------
|