Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <marcinjd@microsoft.com>2018-12-07 08:24:33 +0300
committerMarcin Junczys-Dowmunt <marcinjd@microsoft.com>2018-12-07 08:24:33 +0300
commit01ce3ed71b1f5491ed09fbf15916ca737c74b09a (patch)
tree1b84c282ed7b526d456134b2a3f30034858a8f07
parentebb1cda730ef0d1a8a782dd608051f573fb9c955 (diff)
detect cpu intrinsics
-rw-r--r--CMakeLists.txt41
-rw-r--r--cmake/FindSSE.cmake148
2 files changed, 183 insertions, 6 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4ef58d12..0c0cbbd8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -33,6 +33,34 @@ message(STATUS "Project version: ${PROJECT_VERSION_STRING_FULL}")
execute_process(COMMAND git submodule update --init --recursive --no-fetch
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+set(INTRINSICS "")
+if(BUILD_ARCH STREQUAL "native")
+ message(STATUS "Checking support for CPU intrinsics")
+ include(FindSSE)
+ if(SSE2_FOUND)
+ message(STATUS "SSE2 support found")
+ set(INTRINSICS "${INTRINSICS} -msse2")
+ endif(SSE2_FOUND)
+ if(SSE3_FOUND)
+ message(STATUS "SSE3 support found")
+ set(INTRINSICS "${INTRINSICS} -msse3")
+ endif(SSE3_FOUND)
+ if(SSE4_1_FOUND)
+ message(STATUS "SSE4.1 support found")
+ set(INTRINSICS "${INTRINSICS} -msse4.1")
+ endif(SSE4_1_FOUND)
+ if(AVX_FOUND)
+ message(STATUS "AVX support found")
+ set(INTRINSICS "${INTRINSICS} -mavx")
+ endif(AVX_FOUND)
+ if(AVX2_FOUND)
+ message(STATUS "AVX2 support found")
+ set(INTRINSICS "${INTRINSICS} -mavx2")
+ endif(AVX2_FOUND)
+else()
+ set(INTRINSICS "-msse4.1")
+endif()
+
# Set compilation flags
if(MSVC)
set(CMAKE_CXX_FLAGS "/EHsc /DWIN32 /D_WINDOWS /DUNICODE /D_UNICODE /D_CRT_NONSTDC_NO_WARNINGS /D_CRT_SECURE_NO_WARNINGS")
@@ -47,14 +75,15 @@ else()
# These are used in src/CMakeLists.txt on a per-target basis
list(APPEND ALL_WARNINGS -Wall; -Werror; -Wno-unused-result; -Wno-deprecated; -Wno-pragmas; -Wno-unused-parameter; -Wextra; -Wno-unused-function;
- -Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; -Wno-missing-field-initializers)
+ -Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; -Wno-missing-field-initializers;
+ -Wno-unused-but-set-variable)
# This warning does not exist prior to gcc 5.0
if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0)
list(APPEND ALL_WARNINGS -Wsuggest-override)
endif()
- set(CMAKE_CXX_FLAGS "-std=c++11 -O3 -Ofast -m64 -pthread -march=${BUILD_ARCH} -msse4.1 -Wl,--no-as-needed -funroll-loops -ffinite-math-only -fPIC ${DISABLE_GLOBALLY}")
+ set(CMAKE_CXX_FLAGS "-std=c++11 -O3 -Ofast -m64 -pthread -march=${BUILD_ARCH} ${INTRINSICS} -Wl,--no-as-needed -funroll-loops -ffinite-math-only -fPIC ${DISABLE_GLOBALLY}")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -g -rdynamic")
set(CMAKE_CXX_FLAGS_DEBUG "-std=c++11 -g -rdynamic -O0 -pthread -Wl,--no-as-needed -fPIC -Wno-unused-result -Wno-deprecated -Werror -Wno-pragmas")
set(CMAKE_CXX_FLAGS_SLIM "${CMAKE_CXX_FLAGS} -DNDEBUG")
@@ -92,10 +121,10 @@ if(CUDA_FOUND)
if(USE_STATIC_LIBS)
find_library(CUDA_culibos_LIBRARY NAMES culibos PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib64)
set(EXT_LIBS ${EXT_LIBS} ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_culibos_LIBRARY} ${CUDA_CUBLAS_LIBRARIES})
- message("-- Found CUDA libraries: ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_culibos_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}")
+ message(STATUS "Found CUDA libraries: ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_culibos_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}")
else(USE_STATIC_LIBS)
set(EXT_LIBS ${EXT_LIBS} ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_CUBLAS_LIBRARIES})
- message("-- Found CUDA libraries: ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}")
+ message(STATUS "Found CUDA libraries: ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}")
endif(USE_STATIC_LIBS)
if(USE_CUDNN)
@@ -223,9 +252,9 @@ set(BOOST_COMPONENTS timer iostreams filesystem system chrono)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9)
add_definitions(-DUSE_BOOST_REGEX=1)
set(BOOST_COMPONENTS ${BOOST_COMPONENTS} regex)
- message("-- Using boost::regex")
+ message(STATUS "Using boost::regex")
else()
- message("-- Using std::regex")
+ message(STATUS "Using std::regex")
endif()
if(COMPILE_SERVER)
diff --git a/cmake/FindSSE.cmake b/cmake/FindSSE.cmake
new file mode 100644
index 00000000..c152dd74
--- /dev/null
+++ b/cmake/FindSSE.cmake
@@ -0,0 +1,148 @@
+# Check if SSE/AVX instructions are available on the machine where
+# the project is compiled.
+
+IF(CMAKE_SYSTEM_NAME MATCHES "Linux")
+ EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO)
+
+ STRING(REGEX REPLACE "^.*(sse2).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "sse2" "${SSE_THERE}" SSE2_TRUE)
+ IF (SSE2_TRUE)
+ set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
+ ELSE (SSE2_TRUE)
+ set(SSE2_FOUND false CACHE BOOL "SSE2 available on host")
+ ENDIF (SSE2_TRUE)
+
+ # /proc/cpuinfo apparently omits sse3 :(
+ STRING(REGEX REPLACE "^.*[^s](sse3).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "sse3" "${SSE_THERE}" SSE3_TRUE)
+ IF (NOT SSE3_TRUE)
+ STRING(REGEX REPLACE "^.*(T2300).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "T2300" "${SSE_THERE}" SSE3_TRUE)
+ ENDIF (NOT SSE3_TRUE)
+
+ STRING(REGEX REPLACE "^.*(ssse3).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "ssse3" "${SSE_THERE}" SSSE3_TRUE)
+ IF (SSE3_TRUE OR SSSE3_TRUE)
+ set(SSE3_FOUND true CACHE BOOL "SSE3 available on host")
+ ELSE (SSE3_TRUE OR SSSE3_TRUE)
+ set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
+ ENDIF (SSE3_TRUE OR SSSE3_TRUE)
+ IF (SSSE3_TRUE)
+ set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host")
+ ELSE (SSSE3_TRUE)
+ set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
+ ENDIF (SSSE3_TRUE)
+
+ STRING(REGEX REPLACE "^.*(sse4_1).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "sse4_1" "${SSE_THERE}" SSE41_TRUE)
+ IF (SSE41_TRUE)
+ set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host")
+ ELSE (SSE41_TRUE)
+ set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
+ ENDIF (SSE41_TRUE)
+
+ STRING(REGEX REPLACE "^.*(avx).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "avx" "${SSE_THERE}" AVX_TRUE)
+ IF (AVX_TRUE)
+ set(AVX_FOUND true CACHE BOOL "AVX available on host")
+ ELSE (AVX_TRUE)
+ set(AVX_FOUND false CACHE BOOL "AVX available on host")
+ ENDIF (AVX_TRUE)
+
+ STRING(REGEX REPLACE "^.*(avx2).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "avx2" "${SSE_THERE}" AVX2_TRUE)
+ IF (AVX2_TRUE)
+ set(AVX2_FOUND true CACHE BOOL "AVX2 available on host")
+ ELSE (AVX2_TRUE)
+ set(AVX2_FOUND false CACHE BOOL "AVX2 available on host")
+ ENDIF (AVX2_TRUE)
+
+ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin")
+ EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE
+ CPUINFO)
+
+ STRING(REGEX REPLACE "^.*[^S](SSE2).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "SSE2" "${SSE_THERE}" SSE2_TRUE)
+ IF (SSE2_TRUE)
+ set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
+ ELSE (SSE2_TRUE)
+ set(SSE2_FOUND false CACHE BOOL "SSE2 available on host")
+ ENDIF (SSE2_TRUE)
+
+ STRING(REGEX REPLACE "^.*[^S](SSE3).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "SSE3" "${SSE_THERE}" SSE3_TRUE)
+ IF (SSE3_TRUE)
+ set(SSE3_FOUND true CACHE BOOL "SSE3 available on host")
+ ELSE (SSE3_TRUE)
+ set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
+ ENDIF (SSE3_TRUE)
+
+ STRING(REGEX REPLACE "^.*(SSSE3).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "SSSE3" "${SSE_THERE}" SSSE3_TRUE)
+ IF (SSSE3_TRUE)
+ set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host")
+ ELSE (SSSE3_TRUE)
+ set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
+ ENDIF (SSSE3_TRUE)
+
+ STRING(REGEX REPLACE "^.*(SSE4.1).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "SSE4.1" "${SSE_THERE}" SSE41_TRUE)
+ IF (SSE41_TRUE)
+ set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host")
+ ELSE (SSE41_TRUE)
+ set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
+ ENDIF (SSE41_TRUE)
+
+ STRING(REGEX REPLACE "^.*(AVX).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "AVX" "${SSE_THERE}" AVX_TRUE)
+ IF (AVX_TRUE)
+ set(AVX_FOUND true CACHE BOOL "AVX available on host")
+ ELSE (AVX_TRUE)
+ set(AVX_FOUND false CACHE BOOL "AVX available on host")
+ ENDIF (AVX_TRUE)
+
+ STRING(REGEX REPLACE "^.*(AVX2).*$" "\\1" SSE_THERE ${CPUINFO})
+ STRING(COMPARE EQUAL "AVX2" "${SSE_THERE}" AVX2_TRUE)
+ IF (AVX2_TRUE)
+ set(AVX2_FOUND true CACHE BOOL "AVX2 available on host")
+ ELSE (AVX2_TRUE)
+ set(AVX2_FOUND false CACHE BOOL "AVX2 available on host")
+ ENDIF (AVX2_TRUE)
+
+ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows")
+ # TODO
+ set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
+ set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
+ set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
+ set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
+ set(AVX_FOUND false CACHE BOOL "AVX available on host")
+ set(AVX2_FOUND false CACHE BOOL "AVX2 available on host")
+ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux")
+ set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
+ set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
+ set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
+ set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
+ set(AVX_FOUND false CACHE BOOL "AVX available on host")
+ set(AVX2_FOUND false CACHE BOOL "AVX2 available on host")
+ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux")
+
+if(NOT SSE2_FOUND)
+ MESSAGE(STATUS "Could not find hardware support for SSE2 on this machine.")
+endif(NOT SSE2_FOUND)
+if(NOT SSE3_FOUND)
+ MESSAGE(STATUS "Could not find hardware support for SSE3 on this machine.")
+endif(NOT SSE3_FOUND)
+if(NOT SSSE3_FOUND)
+ MESSAGE(STATUS "Could not find hardware support for SSSE3 on this machine.")
+endif(NOT SSSE3_FOUND)
+if(NOT SSE4_1_FOUND)
+ MESSAGE(STATUS "Could not find hardware support for SSE4.1 on this machine.")
+endif(NOT SSE4_1_FOUND)
+if(NOT AVX_FOUND)
+ MESSAGE(STATUS "Could not find hardware support for AVX on this machine.")
+endif(NOT AVX_FOUND)
+if(NOT AVX2_FOUND)
+ MESSAGE(STATUS "Could not find hardware support for AVX2 on this machine.")
+endif(NOT AVX2_FOUND)
+
+mark_as_advanced(SSE2_FOUND SSE3_FOUND SSSE3_FOUND SSE4_1_FOUND, AVX_FOUND, AVX2_FOUND)