diff options
author | Marcin Junczys-Dowmunt <marcinjd@microsoft.com> | 2018-12-07 08:24:33 +0300 |
---|---|---|
committer | Marcin Junczys-Dowmunt <marcinjd@microsoft.com> | 2018-12-07 08:24:33 +0300 |
commit | 01ce3ed71b1f5491ed09fbf15916ca737c74b09a (patch) | |
tree | 1b84c282ed7b526d456134b2a3f30034858a8f07 | |
parent | ebb1cda730ef0d1a8a782dd608051f573fb9c955 (diff) |
detect cpu intrinsics
-rw-r--r-- | CMakeLists.txt | 41 | ||||
-rw-r--r-- | cmake/FindSSE.cmake | 148 |
2 files changed, 183 insertions, 6 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ef58d12..0c0cbbd8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,34 @@ message(STATUS "Project version: ${PROJECT_VERSION_STRING_FULL}") execute_process(COMMAND git submodule update --init --recursive --no-fetch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) +set(INTRINSICS "") +if(BUILD_ARCH STREQUAL "native") + message(STATUS "Checking support for CPU intrinsics") + include(FindSSE) + if(SSE2_FOUND) + message(STATUS "SSE2 support found") + set(INTRINSICS "${INTRINSICS} -msse2") + endif(SSE2_FOUND) + if(SSE3_FOUND) + message(STATUS "SSE3 support found") + set(INTRINSICS "${INTRINSICS} -msse3") + endif(SSE3_FOUND) + if(SSE4_1_FOUND) + message(STATUS "SSE4.1 support found") + set(INTRINSICS "${INTRINSICS} -msse4.1") + endif(SSE4_1_FOUND) + if(AVX_FOUND) + message(STATUS "AVX support found") + set(INTRINSICS "${INTRINSICS} -mavx") + endif(AVX_FOUND) + if(AVX2_FOUND) + message(STATUS "AVX2 support found") + set(INTRINSICS "${INTRINSICS} -mavx2") + endif(AVX2_FOUND) +else() + set(INTRINSICS "-msse4.1") +endif() + # Set compilation flags if(MSVC) set(CMAKE_CXX_FLAGS "/EHsc /DWIN32 /D_WINDOWS /DUNICODE /D_UNICODE /D_CRT_NONSTDC_NO_WARNINGS /D_CRT_SECURE_NO_WARNINGS") @@ -47,14 +75,15 @@ else() # These are used in src/CMakeLists.txt on a per-target basis list(APPEND ALL_WARNINGS -Wall; -Werror; -Wno-unused-result; -Wno-deprecated; -Wno-pragmas; -Wno-unused-parameter; -Wextra; -Wno-unused-function; - -Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; -Wno-missing-field-initializers) + -Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; -Wno-missing-field-initializers; + -Wno-unused-but-set-variable) # This warning does not exist prior to gcc 5.0 if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0) list(APPEND ALL_WARNINGS -Wsuggest-override) endif() - set(CMAKE_CXX_FLAGS "-std=c++11 -O3 -Ofast -m64 -pthread -march=${BUILD_ARCH} -msse4.1 -Wl,--no-as-needed -funroll-loops -ffinite-math-only -fPIC ${DISABLE_GLOBALLY}") + set(CMAKE_CXX_FLAGS "-std=c++11 -O3 -Ofast -m64 -pthread -march=${BUILD_ARCH} ${INTRINSICS} -Wl,--no-as-needed -funroll-loops -ffinite-math-only -fPIC ${DISABLE_GLOBALLY}") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -g -rdynamic") set(CMAKE_CXX_FLAGS_DEBUG "-std=c++11 -g -rdynamic -O0 -pthread -Wl,--no-as-needed -fPIC -Wno-unused-result -Wno-deprecated -Werror -Wno-pragmas") set(CMAKE_CXX_FLAGS_SLIM "${CMAKE_CXX_FLAGS} -DNDEBUG") @@ -92,10 +121,10 @@ if(CUDA_FOUND) if(USE_STATIC_LIBS) find_library(CUDA_culibos_LIBRARY NAMES culibos PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib64) set(EXT_LIBS ${EXT_LIBS} ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_culibos_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}) - message("-- Found CUDA libraries: ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_culibos_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}") + message(STATUS "Found CUDA libraries: ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_culibos_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}") else(USE_STATIC_LIBS) set(EXT_LIBS ${EXT_LIBS} ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}) - message("-- Found CUDA libraries: ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}") + message(STATUS "Found CUDA libraries: ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}") endif(USE_STATIC_LIBS) if(USE_CUDNN) @@ -223,9 +252,9 @@ set(BOOST_COMPONENTS timer iostreams filesystem system chrono) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9) add_definitions(-DUSE_BOOST_REGEX=1) set(BOOST_COMPONENTS ${BOOST_COMPONENTS} regex) - message("-- Using boost::regex") + message(STATUS "Using boost::regex") else() - message("-- Using std::regex") + message(STATUS "Using std::regex") endif() if(COMPILE_SERVER) diff --git a/cmake/FindSSE.cmake b/cmake/FindSSE.cmake new file mode 100644 index 00000000..c152dd74 --- /dev/null +++ b/cmake/FindSSE.cmake @@ -0,0 +1,148 @@ +# Check if SSE/AVX instructions are available on the machine where +# the project is compiled. + +IF(CMAKE_SYSTEM_NAME MATCHES "Linux") + EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO) + + STRING(REGEX REPLACE "^.*(sse2).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "sse2" "${SSE_THERE}" SSE2_TRUE) + IF (SSE2_TRUE) + set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") + ELSE (SSE2_TRUE) + set(SSE2_FOUND false CACHE BOOL "SSE2 available on host") + ENDIF (SSE2_TRUE) + + # /proc/cpuinfo apparently omits sse3 :( + STRING(REGEX REPLACE "^.*[^s](sse3).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "sse3" "${SSE_THERE}" SSE3_TRUE) + IF (NOT SSE3_TRUE) + STRING(REGEX REPLACE "^.*(T2300).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "T2300" "${SSE_THERE}" SSE3_TRUE) + ENDIF (NOT SSE3_TRUE) + + STRING(REGEX REPLACE "^.*(ssse3).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "ssse3" "${SSE_THERE}" SSSE3_TRUE) + IF (SSE3_TRUE OR SSSE3_TRUE) + set(SSE3_FOUND true CACHE BOOL "SSE3 available on host") + ELSE (SSE3_TRUE OR SSSE3_TRUE) + set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") + ENDIF (SSE3_TRUE OR SSSE3_TRUE) + IF (SSSE3_TRUE) + set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host") + ELSE (SSSE3_TRUE) + set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") + ENDIF (SSSE3_TRUE) + + STRING(REGEX REPLACE "^.*(sse4_1).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "sse4_1" "${SSE_THERE}" SSE41_TRUE) + IF (SSE41_TRUE) + set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host") + ELSE (SSE41_TRUE) + set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") + ENDIF (SSE41_TRUE) + + STRING(REGEX REPLACE "^.*(avx).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "avx" "${SSE_THERE}" AVX_TRUE) + IF (AVX_TRUE) + set(AVX_FOUND true CACHE BOOL "AVX available on host") + ELSE (AVX_TRUE) + set(AVX_FOUND false CACHE BOOL "AVX available on host") + ENDIF (AVX_TRUE) + + STRING(REGEX REPLACE "^.*(avx2).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "avx2" "${SSE_THERE}" AVX2_TRUE) + IF (AVX2_TRUE) + set(AVX2_FOUND true CACHE BOOL "AVX2 available on host") + ELSE (AVX2_TRUE) + set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") + ENDIF (AVX2_TRUE) + +ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin") + EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE + CPUINFO) + + STRING(REGEX REPLACE "^.*[^S](SSE2).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "SSE2" "${SSE_THERE}" SSE2_TRUE) + IF (SSE2_TRUE) + set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") + ELSE (SSE2_TRUE) + set(SSE2_FOUND false CACHE BOOL "SSE2 available on host") + ENDIF (SSE2_TRUE) + + STRING(REGEX REPLACE "^.*[^S](SSE3).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "SSE3" "${SSE_THERE}" SSE3_TRUE) + IF (SSE3_TRUE) + set(SSE3_FOUND true CACHE BOOL "SSE3 available on host") + ELSE (SSE3_TRUE) + set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") + ENDIF (SSE3_TRUE) + + STRING(REGEX REPLACE "^.*(SSSE3).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "SSSE3" "${SSE_THERE}" SSSE3_TRUE) + IF (SSSE3_TRUE) + set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host") + ELSE (SSSE3_TRUE) + set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") + ENDIF (SSSE3_TRUE) + + STRING(REGEX REPLACE "^.*(SSE4.1).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "SSE4.1" "${SSE_THERE}" SSE41_TRUE) + IF (SSE41_TRUE) + set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host") + ELSE (SSE41_TRUE) + set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") + ENDIF (SSE41_TRUE) + + STRING(REGEX REPLACE "^.*(AVX).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "AVX" "${SSE_THERE}" AVX_TRUE) + IF (AVX_TRUE) + set(AVX_FOUND true CACHE BOOL "AVX available on host") + ELSE (AVX_TRUE) + set(AVX_FOUND false CACHE BOOL "AVX available on host") + ENDIF (AVX_TRUE) + + STRING(REGEX REPLACE "^.*(AVX2).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "AVX2" "${SSE_THERE}" AVX2_TRUE) + IF (AVX2_TRUE) + set(AVX2_FOUND true CACHE BOOL "AVX2 available on host") + ELSE (AVX2_TRUE) + set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") + ENDIF (AVX2_TRUE) + +ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") + # TODO + set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") + set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") + set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") + set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") + set(AVX_FOUND false CACHE BOOL "AVX available on host") + set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") +ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux") + set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") + set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") + set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") + set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") + set(AVX_FOUND false CACHE BOOL "AVX available on host") + set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") +ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux") + +if(NOT SSE2_FOUND) + MESSAGE(STATUS "Could not find hardware support for SSE2 on this machine.") +endif(NOT SSE2_FOUND) +if(NOT SSE3_FOUND) + MESSAGE(STATUS "Could not find hardware support for SSE3 on this machine.") +endif(NOT SSE3_FOUND) +if(NOT SSSE3_FOUND) + MESSAGE(STATUS "Could not find hardware support for SSSE3 on this machine.") +endif(NOT SSSE3_FOUND) +if(NOT SSE4_1_FOUND) + MESSAGE(STATUS "Could not find hardware support for SSE4.1 on this machine.") +endif(NOT SSE4_1_FOUND) +if(NOT AVX_FOUND) + MESSAGE(STATUS "Could not find hardware support for AVX on this machine.") +endif(NOT AVX_FOUND) +if(NOT AVX2_FOUND) + MESSAGE(STATUS "Could not find hardware support for AVX2 on this machine.") +endif(NOT AVX2_FOUND) + +mark_as_advanced(SSE2_FOUND SSE3_FOUND SSSE3_FOUND SSE4_1_FOUND, AVX_FOUND, AVX2_FOUND) |