1
0
Fork 0

Updated matthias kretz's OptimizeForArchitecture

This commit is contained in:
Kaveh Vahedipour 2016-11-17 10:37:52 +01:00
parent 764615cc62
commit 8f4fdc6114
1 changed files with 210 additions and 165 deletions

View File

@ -1,5 +1,23 @@
# Determine the host CPU feature set and determine the best set of compiler
# flags to enable all supported SIMD relevant features. Alternatively, the
# target CPU can be explicitly selected (for generating more generic binaries
# or for targeting a different system).
# Compilers provide e.g. the -march=native flag to achieve a similar result.
# This fails to address the need for building for a different microarchitecture
# than the current host.
# The script tries to deduce all settings from the model and family numbers of
# the CPU instead of reading the CPUID flags from e.g. /proc/cpuinfo. This makes
# the detection more independent from the CPUID code in the kernel (e.g. avx2 is
# not listed on older kernels).
#
# Usage:
# OptimizeForArchitecture()
# If either of Vc_SSE_INTRINSICS_BROKEN, Vc_AVX_INTRINSICS_BROKEN,
# Vc_AVX2_INTRINSICS_BROKEN is defined and set, the OptimizeForArchitecture
# macro will consequently disable the relevant features via compiler flags.
#=============================================================================
# Copyright 2010-2013 Matthias Kretz <kretz@kde.org>
# Copyright 2010-2015 Matthias Kretz <kretz@kde.org>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
@ -7,15 +25,12 @@
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * The names of Kitware, Inc., the Insight Consortium, or the names of
# any consortium members, or of any contributors, may not be used to
# endorse or promote products derived from this software without
# specific prior written permission.
# * Neither the names of contributing organizations nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS''
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@ -31,7 +46,7 @@
get_filename_component(_currentDir "${CMAKE_CURRENT_LIST_FILE}" PATH)
include("${_currentDir}/AddCompilerFlag.cmake")
include(CheckIncludeFile)
include(CheckIncludeFileCXX)
macro(_my_find _list _value _ret)
list(FIND ${_list} "${_value}" _found)
@ -70,47 +85,58 @@ macro(AutodetectHostArchitecture)
endif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
if(_vendor_id STREQUAL "GenuineIntel")
if(_cpu_family EQUAL 6)
# Any recent Intel CPU except NetBurst
if(_cpu_model EQUAL 70)
set(TARGET_ARCHITECTURE "haswell")
elseif(_cpu_model EQUAL 63)
set(TARGET_ARCHITECTURE "haswell")
elseif(_cpu_model EQUAL 62)
set(TARGET_ARCHITECTURE "ivy-bridge")
elseif(_cpu_model EQUAL 61)
# taken from the Intel ORM
# http://www.intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html
# CPUID Signature Values of Of Recent Intel Microarchitectures
# 4E 5E | Skylake microarchitecture
# 3D 47 56 | Broadwell microarchitecture
# 3C 45 46 3F | Haswell microarchitecture
# 3A 3E | Ivy Bridge microarchitecture
# 2A 2D | Sandy Bridge microarchitecture
# 25 2C 2F | Intel microarchitecture Westmere
# 1A 1E 1F 2E | Intel microarchitecture Nehalem
# 17 1D | Enhanced Intel Core microarchitecture
# 0F | Intel Core microarchitecture
#
# Values from the Intel SDE:
# 5C | Goldmont
# 5A | Silvermont
# 57 | Knights Landing
# 66 | Cannonlake
# 55 | Skylake Server
# 4E | Skylake Client
# 3C | Broadwell (likely a bug in the SDE)
# 3C | Haswell
if(_cpu_model EQUAL 87)
set(TARGET_ARCHITECTURE "knl") # Knights Landing
elseif(_cpu_model EQUAL 92)
set(TARGET_ARCHITECTURE "goldmont")
elseif(_cpu_model EQUAL 90)
set(TARGET_ARCHITECTURE "silvermont")
elseif(_cpu_model EQUAL 102)
set(TARGET_ARCHITECTURE "cannonlake")
elseif(_cpu_model EQUAL 85) # 55
set(TARGET_ARCHITECTURE "skylake-avx512")
elseif(_cpu_model EQUAL 78 OR _cpu_model EQUAL 94) # 4E, 5E
set(TARGET_ARCHITECTURE "skylake")
elseif(_cpu_model EQUAL 61 OR _cpu_model EQUAL 71 OR _cpu_model EQUAL 86)
set(TARGET_ARCHITECTURE "broadwell")
elseif(_cpu_model EQUAL 60)
elseif(_cpu_model EQUAL 60 OR _cpu_model EQUAL 69 OR _cpu_model EQUAL 70 OR _cpu_model EQUAL 63)
set(TARGET_ARCHITECTURE "haswell")
elseif(_cpu_model EQUAL 60)
set(TARGET_ARCHITECTURE "haswell")
elseif(_cpu_model EQUAL 58)
elseif(_cpu_model EQUAL 58 OR _cpu_model EQUAL 62)
set(TARGET_ARCHITECTURE "ivy-bridge")
elseif(_cpu_model EQUAL 47) # Xeon E7 4860
set(TARGET_ARCHITECTURE "westmere")
elseif(_cpu_model EQUAL 46) # Xeon 7500 series
set(TARGET_ARCHITECTURE "westmere")
elseif(_cpu_model EQUAL 45) # Xeon TNG
elseif(_cpu_model EQUAL 42 OR _cpu_model EQUAL 45)
set(TARGET_ARCHITECTURE "sandy-bridge")
elseif(_cpu_model EQUAL 44) # Xeon 5600 series
elseif(_cpu_model EQUAL 37 OR _cpu_model EQUAL 44 OR _cpu_model EQUAL 47)
set(TARGET_ARCHITECTURE "westmere")
elseif(_cpu_model EQUAL 42) # Core TNG
set(TARGET_ARCHITECTURE "sandy-bridge")
elseif(_cpu_model EQUAL 37) # Core i7/i5/i3
set(TARGET_ARCHITECTURE "westmere")
elseif(_cpu_model EQUAL 31) # Core i7/i5
set(TARGET_ARCHITECTURE "westmere")
elseif(_cpu_model EQUAL 30) # Core i7/i5
set(TARGET_ARCHITECTURE "westmere")
elseif(_cpu_model EQUAL 29)
set(TARGET_ARCHITECTURE "penryn")
elseif(_cpu_model EQUAL 28)
set(TARGET_ARCHITECTURE "atom")
elseif(_cpu_model EQUAL 26)
elseif(_cpu_model EQUAL 26 OR _cpu_model EQUAL 30 OR _cpu_model EQUAL 31 OR _cpu_model EQUAL 46)
set(TARGET_ARCHITECTURE "nehalem")
elseif(_cpu_model EQUAL 23)
elseif(_cpu_model EQUAL 23 OR _cpu_model EQUAL 29)
set(TARGET_ARCHITECTURE "penryn")
elseif(_cpu_model EQUAL 15)
set(TARGET_ARCHITECTURE "merom")
elseif(_cpu_model EQUAL 28)
set(TARGET_ARCHITECTURE "atom")
elseif(_cpu_model EQUAL 14)
set(TARGET_ARCHITECTURE "core")
elseif(_cpu_model LESS 14)
@ -152,7 +178,15 @@ macro(AutodetectHostArchitecture)
endmacro()
macro(OptimizeForArchitecture)
set(TARGET_ARCHITECTURE "auto" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used.\nSetting the value to \"auto\" will try to optimize for the architecture where cmake is called.\nOther supported values are: \"none\", \"generic\", \"core\", \"merom\" (65nm Core2), \"penryn\" (45nm Core2), \"nehalem\", \"westmere\", \"sandy-bridge\", \"ivy-bridge\", \"haswell\", \"broadwell\", \"atom\", \"k8\", \"k8-sse3\", \"barcelona\", \"istanbul\", \"magny-cours\", \"bulldozer\", \"interlagos\", \"piledriver\", \"AMD 14h\", \"AMD 16h\".")
set(TARGET_ARCHITECTURE "auto" CACHE STRING "CPU architecture to optimize for. \
Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used. \
Setting the value to \"auto\" will try to optimize for the architecture where cmake is called. \
Other supported values are: \"none\", \"generic\", \"core\", \"merom\" (65nm Core2), \
\"penryn\" (45nm Core2), \"nehalem\", \"westmere\", \"sandy-bridge\", \"ivy-bridge\", \
\"haswell\", \"broadwell\", \"skylake\", \"skylake-avx512\", \"cannonlake\", \"silvermont\", \
\"goldmont\", \"knl\" (Knights Landing), \"atom\", \"k8\", \"k8-sse3\", \"barcelona\", \
\"istanbul\", \"magny-cours\", \"bulldozer\", \"interlagos\", \"piledriver\", \
\"AMD 14h\", \"AMD 16h\".")
set(_force)
if(NOT _last_target_arch STREQUAL "${TARGET_ARCHITECTURE}")
message(STATUS "target changed from \"${_last_target_arch}\" to \"${TARGET_ARCHITECTURE}\"")
@ -170,6 +204,58 @@ macro(OptimizeForArchitecture)
message(STATUS "Detected CPU: ${TARGET_ARCHITECTURE}")
endif(TARGET_ARCHITECTURE STREQUAL "auto")
macro(_nehalem)
list(APPEND _march_flag_list "nehalem")
list(APPEND _march_flag_list "corei7")
list(APPEND _march_flag_list "core2")
list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2")
endmacro()
macro(_westmere)
list(APPEND _march_flag_list "westmere")
_nehalem()
endmacro()
macro(_sandybridge)
list(APPEND _march_flag_list "sandybridge")
list(APPEND _march_flag_list "corei7-avx")
_westmere()
list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx")
endmacro()
macro(_ivybridge)
list(APPEND _march_flag_list "ivybridge")
list(APPEND _march_flag_list "core-avx-i")
_sandybridge()
list(APPEND _available_vector_units_list "rdrnd" "f16c")
endmacro()
macro(_haswell)
list(APPEND _march_flag_list "haswell")
list(APPEND _march_flag_list "core-avx2")
_ivybridge()
list(APPEND _available_vector_units_list "avx2" "fma" "bmi" "bmi2")
endmacro()
macro(_broadwell)
list(APPEND _march_flag_list "broadwell")
_haswell()
endmacro()
macro(_skylake)
list(APPEND _march_flag_list "skylake")
_broadwell()
endmacro()
macro(_skylake_avx512)
list(APPEND _march_flag_list "skylake-avx512")
_skylake()
list(APPEND _available_vector_units_list "avx512f" "avx512cd" "avx512dq" "avx512bw" "avx512vl")
endmacro()
macro(_cannonlake)
list(APPEND _march_flag_list "cannonlake")
_skylake_avx512()
list(APPEND _available_vector_units_list "avx512ifma" "avx512vbmi")
endmacro()
macro(_knightslanding)
list(APPEND _march_flag_list "knl")
_broadwell()
list(APPEND _available_vector_units_list "avx512f" "avx512pf" "avx512er" "avx512cd")
endmacro()
if(TARGET_ARCHITECTURE STREQUAL "core")
list(APPEND _march_flag_list "core2")
list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
@ -188,38 +274,26 @@ macro(OptimizeForArchitecture)
else()
message(STATUS "SSE4.1: disabled (auto-detected from this computer's CPU flags)")
endif()
elseif(TARGET_ARCHITECTURE STREQUAL "nehalem")
list(APPEND _march_flag_list "nehalem")
list(APPEND _march_flag_list "corei7")
list(APPEND _march_flag_list "core2")
list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2")
elseif(TARGET_ARCHITECTURE STREQUAL "westmere")
list(APPEND _march_flag_list "westmere")
list(APPEND _march_flag_list "corei7")
list(APPEND _march_flag_list "core2")
list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2")
elseif(TARGET_ARCHITECTURE STREQUAL "haswell")
list(APPEND _march_flag_list "core-avx2")
list(APPEND _march_flag_list "core-avx-i")
list(APPEND _march_flag_list "corei7-avx")
list(APPEND _march_flag_list "core2")
list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx" "avx2" "rdrnd" "f16c" "fma")
elseif(TARGET_ARCHITECTURE STREQUAL "knl")
_knightslanding()
elseif(TARGET_ARCHITECTURE STREQUAL "cannonlake")
_cannonlake()
elseif(TARGET_ARCHITECTURE STREQUAL "skylake-xeon" OR TARGET_ARCHITECTURE STREQUAL "skylake-avx512")
_skylake_avx512()
elseif(TARGET_ARCHITECTURE STREQUAL "skylake")
_skylake()
elseif(TARGET_ARCHITECTURE STREQUAL "broadwell")
list(APPEND _march_flag_list "core-avx2")
list(APPEND _march_flag_list "core-avx-i")
list(APPEND _march_flag_list "corei7-avx")
list(APPEND _march_flag_list "core2")
list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx" "avx2" "rdrnd" "f16c" "fma")
_broadwell()
elseif(TARGET_ARCHITECTURE STREQUAL "haswell")
_haswell()
elseif(TARGET_ARCHITECTURE STREQUAL "ivy-bridge")
list(APPEND _march_flag_list "core-avx-i")
list(APPEND _march_flag_list "corei7-avx")
list(APPEND _march_flag_list "core2")
list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx" "rdrnd" "f16c")
_ivybridge()
elseif(TARGET_ARCHITECTURE STREQUAL "sandy-bridge")
list(APPEND _march_flag_list "sandybridge")
list(APPEND _march_flag_list "corei7-avx")
list(APPEND _march_flag_list "core2")
list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx")
_sandybridge()
elseif(TARGET_ARCHITECTURE STREQUAL "westmere")
_westmere()
elseif(TARGET_ARCHITECTURE STREQUAL "nehalem")
_nehalem()
elseif(TARGET_ARCHITECTURE STREQUAL "atom")
list(APPEND _march_flag_list "atom")
list(APPEND _march_flag_list "core2")
@ -280,113 +354,84 @@ macro(OptimizeForArchitecture)
if(NOT TARGET_ARCHITECTURE STREQUAL "none")
set(_disable_vector_unit_list)
set(_enable_vector_unit_list)
_my_find(_available_vector_units_list "sse2" SSE2_FOUND)
_my_find(_available_vector_units_list "sse3" SSE3_FOUND)
_my_find(_available_vector_units_list "ssse3" SSSE3_FOUND)
_my_find(_available_vector_units_list "sse4.1" SSE4_1_FOUND)
_my_find(_available_vector_units_list "sse4.2" SSE4_2_FOUND)
_my_find(_available_vector_units_list "sse4a" SSE4a_FOUND)
if(DEFINED Vc_AVX_INTRINSICS_BROKEN AND Vc_AVX_INTRINSICS_BROKEN)
UserWarning("AVX disabled per default because of old/broken compiler")
set(AVX_FOUND false)
set(XOP_FOUND false)
set(FMA4_FOUND false)
set(AVX2_FOUND false)
UserWarning("AVX disabled per default because of old/broken toolchain")
set(_avx_broken true)
set(_avx2_broken true)
set(_fma4_broken true)
set(_xop_broken true)
else()
_my_find(_available_vector_units_list "avx" AVX_FOUND)
set(_avx_broken false)
if(DEFINED Vc_FMA4_INTRINSICS_BROKEN AND Vc_FMA4_INTRINSICS_BROKEN)
UserWarning("FMA4 disabled per default because of old/broken compiler")
set(FMA4_FOUND false)
UserWarning("FMA4 disabled per default because of old/broken toolchain")
set(_fma4_broken true)
else()
_my_find(_available_vector_units_list "fma4" FMA4_FOUND)
set(_fma4_broken false)
endif()
if(DEFINED Vc_XOP_INTRINSICS_BROKEN AND Vc_XOP_INTRINSICS_BROKEN)
UserWarning("XOP disabled per default because of old/broken compiler")
set(XOP_FOUND false)
UserWarning("XOP disabled per default because of old/broken toolchain")
set(_xop_broken true)
else()
_my_find(_available_vector_units_list "xop" XOP_FOUND)
set(_xop_broken false)
endif()
if(DEFINED Vc_AVX2_INTRINSICS_BROKEN AND Vc_AVX2_INTRINSICS_BROKEN)
UserWarning("AVX2 disabled per default because of old/broken compiler")
set(AVX2_FOUND false)
UserWarning("AVX2 disabled per default because of old/broken toolchain")
set(_avx2_broken true)
else()
_my_find(_available_vector_units_list "avx2" AVX2_FOUND)
set(_avx2_broken false)
endif()
endif()
set(USE_SSE2 ${SSE2_FOUND} CACHE BOOL "Use SSE2. If SSE2 instructions are not enabled the SSE implementation will be disabled." ${_force})
set(USE_SSE3 ${SSE3_FOUND} CACHE BOOL "Use SSE3. If SSE3 instructions are not enabled they will be emulated." ${_force})
set(USE_SSSE3 ${SSSE3_FOUND} CACHE BOOL "Use SSSE3. If SSSE3 instructions are not enabled they will be emulated." ${_force})
set(USE_SSE4_1 ${SSE4_1_FOUND} CACHE BOOL "Use SSE4.1. If SSE4.1 instructions are not enabled they will be emulated." ${_force})
set(USE_SSE4_2 ${SSE4_2_FOUND} CACHE BOOL "Use SSE4.2. If SSE4.2 instructions are not enabled they will be emulated." ${_force})
set(USE_SSE4a ${SSE4a_FOUND} CACHE BOOL "Use SSE4a. If SSE4a instructions are not enabled they will be emulated." ${_force})
set(USE_AVX ${AVX_FOUND} CACHE BOOL "Use AVX. This will double some of the vector sizes relative to SSE." ${_force})
set(USE_AVX2 ${AVX2_FOUND} CACHE BOOL "Use AVX2. This will double all of the vector sizes relative to SSE." ${_force})
set(USE_XOP ${XOP_FOUND} CACHE BOOL "Use XOP." ${_force})
set(USE_FMA4 ${FMA4_FOUND} CACHE BOOL "Use FMA4." ${_force})
mark_as_advanced(USE_SSE2 USE_SSE3 USE_SSSE3 USE_SSE4_1 USE_SSE4_2 USE_SSE4a USE_AVX USE_AVX2 USE_XOP USE_FMA4)
if(USE_SSE2)
list(APPEND _enable_vector_unit_list "sse2")
else(USE_SSE2)
list(APPEND _disable_vector_unit_list "sse2")
endif(USE_SSE2)
if(USE_SSE3)
list(APPEND _enable_vector_unit_list "sse3")
else(USE_SSE3)
list(APPEND _disable_vector_unit_list "sse3")
endif(USE_SSE3)
if(USE_SSSE3)
list(APPEND _enable_vector_unit_list "ssse3")
else(USE_SSSE3)
list(APPEND _disable_vector_unit_list "ssse3")
endif(USE_SSSE3)
if(USE_SSE4_1)
list(APPEND _enable_vector_unit_list "sse4.1")
else(USE_SSE4_1)
list(APPEND _disable_vector_unit_list "sse4.1")
endif(USE_SSE4_1)
if(USE_SSE4_2)
list(APPEND _enable_vector_unit_list "sse4.2")
else(USE_SSE4_2)
list(APPEND _disable_vector_unit_list "sse4.2")
endif(USE_SSE4_2)
if(USE_SSE4a)
list(APPEND _enable_vector_unit_list "sse4a")
else(USE_SSE4a)
list(APPEND _disable_vector_unit_list "sse4a")
endif(USE_SSE4a)
if(USE_AVX)
list(APPEND _enable_vector_unit_list "avx")
# we want SSE intrinsics to result in instructions using the VEX prefix.
# Otherwise integer ops (which require the older SSE intrinsics) would
# always have a large penalty.
list(APPEND _enable_vector_unit_list "sse2avx")
else(USE_AVX)
list(APPEND _disable_vector_unit_list "avx")
endif(USE_AVX)
if(USE_XOP)
list(APPEND _enable_vector_unit_list "xop")
else()
list(APPEND _disable_vector_unit_list "xop")
endif()
if(USE_FMA4)
list(APPEND _enable_vector_unit_list "fma4")
else()
list(APPEND _disable_vector_unit_list "fma4")
endif()
if(USE_AVX2)
list(APPEND _enable_vector_unit_list "avx2")
else()
list(APPEND _disable_vector_unit_list "avx2")
endif()
macro(_enable_or_disable _name _flag _documentation _broken)
if(_broken)
set(_found false)
else()
_my_find(_available_vector_units_list "${_flag}" _found)
endif()
set(USE_${_name} ${_found} CACHE BOOL "${documentation}" ${_force})
mark_as_advanced(USE_${_name})
if(USE_${_name})
list(APPEND _enable_vector_unit_list "${_flag}")
else()
list(APPEND _disable_vector_unit_list "${_flag}")
endif()
endmacro()
_enable_or_disable(SSE2 "sse2" "Use SSE2. If SSE2 instructions are not enabled the SSE implementation will be disabled." false)
_enable_or_disable(SSE3 "sse3" "Use SSE3. If SSE3 instructions are not enabled they will be emulated." false)
_enable_or_disable(SSSE3 "ssse3" "Use SSSE3. If SSSE3 instructions are not enabled they will be emulated." false)
_enable_or_disable(SSE4_1 "sse4.1" "Use SSE4.1. If SSE4.1 instructions are not enabled they will be emulated." false)
_enable_or_disable(SSE4_2 "sse4.2" "Use SSE4.2. If SSE4.2 instructions are not enabled they will be emulated." false)
_enable_or_disable(SSE4a "sse4a" "Use SSE4a. If SSE4a instructions are not enabled they will be emulated." false)
_enable_or_disable(AVX "avx" "Use AVX. This will all floating-point vector sizes relative to SSE." _avx_broken)
_enable_or_disable(FMA "fma" "Use FMA." _avx_broken)
_enable_or_disable(BMI2 "bmi2" "Use BMI2." _avx_broken)
_enable_or_disable(AVX2 "avx2" "Use AVX2. This will double all of the vector sizes relative to SSE." _avx2_broken)
_enable_or_disable(XOP "xop" "Use XOP." _xop_broken)
_enable_or_disable(FMA4 "fma4" "Use FMA4." _fma4_broken)
_enable_or_disable(AVX512F "avx512f" "Use AVX512F. This will double all floating-point vector sizes relative to AVX2." false)
_enable_or_disable(AVX512VL "avx512vl" "Use AVX512VL. This enables 128- and 256-bit vector length instructions with EVEX coding (improved write-masking & more vector registers)." _avx2_broken)
_enable_or_disable(AVX512PF "avx512pf" "Use AVX512PF. This enables prefetch instructions for gathers and scatters." false)
_enable_or_disable(AVX512ER "avx512er" "Use AVX512ER. This enables exponential and reciprocal instructions." false)
_enable_or_disable(AVX512CD "avx512cd" "Use AVX512CD." false)
_enable_or_disable(AVX512DQ "avx512dq" "Use AVX512DQ." false)
_enable_or_disable(AVX512BW "avx512bw" "Use AVX512BW." false)
_enable_or_disable(AVX512IFMA "avx512ifma" "Use AVX512IFMA." false)
_enable_or_disable(AVX512VBMI "avx512vbmi" "Use AVX512VBMI." false)
if(MSVC)
# MSVC on 32 bit can select /arch:SSE2 (since 2010 also /arch:AVX)
# MSVC on 64 bit cannot select anything (should have changed with MSVC 2010)
_my_find(_enable_vector_unit_list "avx" _avx)
set(_avx_flag FALSE)
if(_avx)
AddCompilerFlag("/arch:AVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _avx_flag)
_my_find(_enable_vector_unit_list "avx2" _found)
if(_found)
AddCompilerFlag("/arch:AVX2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _found)
endif()
if(NOT _avx_flag)
if(NOT _found)
_my_find(_enable_vector_unit_list "avx" _found)
if(_found)
AddCompilerFlag("/arch:AVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _found)
endif()
endif()
if(NOT _found)
_my_find(_enable_vector_unit_list "sse2" _found)
if(_found)
AddCompilerFlag("/arch:SSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
@ -404,7 +449,7 @@ macro(OptimizeForArchitecture)
else(_found)
_my_find(_available_vector_units_list "f16c" _found)
if(_found)
AddCompilerFlag("-march=native" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
AddCompilerFlag("-xCORE-AVX-I" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
else(_found)
_my_find(_available_vector_units_list "avx" _found)
if(_found)
@ -454,7 +499,7 @@ macro(OptimizeForArchitecture)
endif(_good)
endforeach(_flag)
foreach(_flag ${_enable_vector_unit_list})
AddCompilerFlag("-m${_flag}" CXX_RESULT _result CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
AddCompilerFlag("-m${_flag}" CXX_RESULT _result)
if(_result)
set(_header FALSE)
if(_flag STREQUAL "sse3")
@ -479,7 +524,7 @@ macro(OptimizeForArchitecture)
set(_resultVar "HAVE_${_header}")
string(REPLACE "." "_" _resultVar "${_resultVar}")
if(_header)
CHECK_INCLUDE_FILE("${_header}" ${_resultVar} "-m${_flag}")
CHECK_INCLUDE_FILE_CXX("${_header}" ${_resultVar} "-m${_flag}")
if(NOT ${_resultVar})
set(_useVar "USE_${_flag}")
string(TOUPPER "${_useVar}" _useVar)
@ -490,7 +535,7 @@ macro(OptimizeForArchitecture)
endif()
endif()
if(NOT _header OR ${_resultVar})
set(Vc_ARCHITECTURE_FLAGS "${Vc_ARCHITECTURE_FLAGS} -m${_flag}")
list(APPEND Vc_ARCHITECTURE_FLAGS "-m${_flag}")
endif()
endif()
endforeach(_flag)