[libc] Automatically add -mfma flag for architectures supporting FMA.
Detect if the architecture supports FMA instructions and if the targets depend on fma. Reviewed By: gchatelet Differential Revision: https://reviews.llvm.org/D123615
This commit is contained in:
parent
78b16ccf2b
commit
614567a7bf
|
@ -6,7 +6,7 @@
|
|||
set(ALL_CPU_FEATURES "")
|
||||
|
||||
if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
|
||||
set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX2 AVX512F)
|
||||
set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX2 AVX512F FMA)
|
||||
set(LIBC_COMPILE_OPTIONS_NATIVE -march=native)
|
||||
elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
|
||||
set(LIBC_COMPILE_OPTIONS_NATIVE -mcpu=native)
|
||||
|
@ -66,6 +66,7 @@ if(CMAKE_CROSSCOMPILING)
|
|||
if(NOT "${cpu_features}" STREQUAL "${LIBC_CPU_FEATURES}")
|
||||
message(FATAL_ERROR "Unsupported CPU features: ${cpu_features}")
|
||||
endif()
|
||||
message(STATUS "Set CPU features: ${cpu_features}")
|
||||
set(LIBC_CPU_FEATURES "${cpu_features}")
|
||||
else()
|
||||
# Populates the LIBC_CPU_FEATURES list from host.
|
||||
|
@ -76,6 +77,7 @@ else()
|
|||
COMPILE_OUTPUT_VARIABLE compile_output
|
||||
RUN_OUTPUT_VARIABLE run_output)
|
||||
if("${run_result}" EQUAL 0)
|
||||
message(STATUS "Set CPU features: ${run_output}")
|
||||
set(LIBC_CPU_FEATURES "${run_output}")
|
||||
elseif(NOT ${compile_result})
|
||||
message(FATAL_ERROR "Failed to compile: ${compile_output}")
|
||||
|
|
|
@ -131,3 +131,8 @@ endfunction(get_fq_dep_list_without_flag)
|
|||
|
||||
# Special flags
|
||||
set(FMA_OPT_FLAG "FMA_OPT")
|
||||
|
||||
# Skip FMA_OPT flag for targets that don't support fma.
|
||||
if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "FMA")))
|
||||
set(SKIP_FLAG_EXPANSION_FMA_OPT TRUE)
|
||||
endif()
|
||||
|
|
|
@ -1,6 +1,14 @@
|
|||
set(OBJECT_LIBRARY_TARGET_TYPE "OBJECT_LIBRARY")
|
||||
|
||||
function(_get_common_compile_options output_var)
|
||||
function(_get_common_compile_options output_var flags)
|
||||
list(FIND flags ${FMA_OPT_FLAG} fma)
|
||||
if(${fma} LESS 0)
|
||||
list(FIND flags "${FMA_OPT_FLAG}__ONLY" fma)
|
||||
endif()
|
||||
if((${fma} GREATER -1) AND (LIBC_CPU_FEATURES MATCHES "FMA"))
|
||||
set(ADD_FMA_FLAG TRUE)
|
||||
endif()
|
||||
|
||||
set(compile_options ${LIBC_COMPILE_OPTIONS_DEFAULT} ${ARGN})
|
||||
if(NOT ${LIBC_TARGET_OS} STREQUAL "windows")
|
||||
set(compile_options ${compile_options} -fpie -ffreestanding -fno-builtin)
|
||||
|
@ -10,9 +18,15 @@ function(_get_common_compile_options output_var)
|
|||
list(APPEND compile_options "-fno-unwind-tables")
|
||||
list(APPEND compile_options "-fno-asynchronous-unwind-tables")
|
||||
list(APPEND compile_options "-fno-rtti")
|
||||
if(ADD_FMA_FLAG)
|
||||
list(APPEND compile_options "-mfma")
|
||||
endif()
|
||||
elseif(MSVC)
|
||||
list(APPEND compile_options "/EHs-c-")
|
||||
list(APPEND compile_options "/GR-")
|
||||
if(ADD_FMA_FLAG)
|
||||
list(APPEND compile_options "/arch:AVX2")
|
||||
endif()
|
||||
endif()
|
||||
set(${output_var} ${compile_options} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
@ -54,7 +68,11 @@ function(create_object_library fq_target_name)
|
|||
${LIBC_SOURCE_DIR}
|
||||
${LIBC_BUILD_DIR}
|
||||
)
|
||||
_get_common_compile_options(compile_options ${ADD_OBJECT_COMPILE_OPTIONS})
|
||||
_get_common_compile_options(
|
||||
compile_options
|
||||
"${ADD_OBJECT_FLAGS}"
|
||||
${ADD_OBJECT_COMPILE_OPTIONS}
|
||||
)
|
||||
target_compile_options(${fq_target_name} PRIVATE ${compile_options})
|
||||
|
||||
get_fq_deps_list(fq_deps_list ${ADD_OBJECT_DEPENDS})
|
||||
|
@ -276,7 +294,11 @@ function(create_entrypoint_object fq_target_name)
|
|||
set(ADD_ENTRYPOINT_OBJ_CXX_STANDARD ${CMAKE_CXX_STANDARD})
|
||||
endif()
|
||||
|
||||
_get_common_compile_options(common_compile_options ${ADD_ENTRYPOINT_OBJ_COMPILE_OPTIONS})
|
||||
_get_common_compile_options(
|
||||
common_compile_options
|
||||
"${ADD_ENTRYPOINT_OBJ_FLAGS}"
|
||||
${ADD_ENTRYPOINT_OBJ_COMPILE_OPTIONS}
|
||||
)
|
||||
set(internal_target_name ${fq_target_name}.__internal__)
|
||||
set(include_dirs ${LIBC_BUILD_DIR}/include ${LIBC_SOURCE_DIR} ${LIBC_BUILD_DIR})
|
||||
get_fq_deps_list(fq_deps_list ${ADD_ENTRYPOINT_OBJ_DEPENDS})
|
||||
|
|
|
@ -48,6 +48,8 @@ add_header_library(
|
|||
DEPENDS
|
||||
.fputil
|
||||
libc.src.__support.FPUtil.generic.fma
|
||||
FLAGS
|
||||
FMA_OPT
|
||||
)
|
||||
|
||||
add_header_library(
|
||||
|
|
|
@ -24,7 +24,7 @@ namespace fputil {
|
|||
template <typename T> static inline T polyeval(T x, T a0) { return a0; }
|
||||
|
||||
template <typename T, typename... Ts>
|
||||
INLINE_FMA static inline T polyeval(T x, T a0, Ts... a) {
|
||||
static inline T polyeval(T x, T a0, Ts... a) {
|
||||
return multiply_add(x, polyeval(x, a...), a0);
|
||||
}
|
||||
|
||||
|
|
|
@ -26,8 +26,8 @@ namespace __llvm_libc {
|
|||
namespace fputil {
|
||||
|
||||
template <typename T>
|
||||
INLINE_FMA static inline cpp::EnableIfType<cpp::IsSame<T, float>::Value, T>
|
||||
fma(T x, T y, T z) {
|
||||
static inline cpp::EnableIfType<cpp::IsSame<T, float>::Value, T> fma(T x, T y,
|
||||
T z) {
|
||||
float result;
|
||||
__m128 xmm = _mm_load_ss(&x); // NOLINT
|
||||
__m128 ymm = _mm_load_ss(&y); // NOLINT
|
||||
|
@ -38,8 +38,8 @@ fma(T x, T y, T z) {
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
INLINE_FMA static inline cpp::EnableIfType<cpp::IsSame<T, double>::Value, T>
|
||||
fma(T x, T y, T z) {
|
||||
static inline cpp::EnableIfType<cpp::IsSame<T, double>::Value, T> fma(T x, T y,
|
||||
T z) {
|
||||
double result;
|
||||
__m128d xmm = _mm_load_sd(&x); // NOLINT
|
||||
__m128d ymm = _mm_load_sd(&y); // NOLINT
|
||||
|
|
|
@ -23,8 +23,7 @@ namespace fputil {
|
|||
// Cubic polynomials:
|
||||
// polyeval(x, a0, a1, a2, a3) = a3*x^3 + a2*x^2 + a1*x + a0
|
||||
template <>
|
||||
INLINE_FMA inline float polyeval(float x, float a0, float a1, float a2,
|
||||
float a3) {
|
||||
inline float polyeval(float x, float a0, float a1, float a2, float a3) {
|
||||
__m128 xmm = _mm_set1_ps(x); // NOLINT
|
||||
__m128 a13 = _mm_set_ps(0.0f, x, a3, a1); // NOLINT
|
||||
__m128 a02 = _mm_set_ps(0.0f, 0.0f, a2, a0); // NOLINT
|
||||
|
@ -35,8 +34,7 @@ INLINE_FMA inline float polyeval(float x, float a0, float a1, float a2,
|
|||
}
|
||||
|
||||
template <>
|
||||
INLINE_FMA inline double polyeval(double x, double a0, double a1, double a2,
|
||||
double a3) {
|
||||
inline double polyeval(double x, double a0, double a1, double a2, double a3) {
|
||||
__m256d xmm = _mm256_set1_pd(x); // NOLINT
|
||||
__m256d a13 = _mm256_set_pd(0.0, x, a3, a1); // NOLINT
|
||||
__m256d a02 = _mm256_set_pd(0.0, 0.0, a2, a0); // NOLINT
|
||||
|
@ -50,8 +48,8 @@ INLINE_FMA inline double polyeval(double x, double a0, double a1, double a2,
|
|||
// polyeval(x, a0, a1, a2, a3, a4, a5) = a5*x^5 + a4*x^4 + a3*x^3 + a2*x^2 +
|
||||
// + a1*x + a0
|
||||
template <>
|
||||
INLINE_FMA inline float polyeval(float x, float a0, float a1, float a2,
|
||||
float a3, float a4, float a5) {
|
||||
inline float polyeval(float x, float a0, float a1, float a2, float a3, float a4,
|
||||
float a5) {
|
||||
__m128 xmm = _mm_set1_ps(x); // NOLINT
|
||||
__m128 a25 = _mm_set_ps(0.0f, x, a5, a2); // NOLINT
|
||||
__m128 a14 = _mm_set_ps(0.0f, 0.0f, a4, a1); // NOLINT
|
||||
|
@ -65,8 +63,8 @@ INLINE_FMA inline float polyeval(float x, float a0, float a1, float a2,
|
|||
}
|
||||
|
||||
template <>
|
||||
INLINE_FMA inline double polyeval(double x, double a0, double a1, double a2,
|
||||
double a3, double a4, double a5) {
|
||||
inline double polyeval(double x, double a0, double a1, double a2, double a3,
|
||||
double a4, double a5) {
|
||||
__m256d xmm = _mm256_set1_pd(x); // NOLINT
|
||||
__m256d a25 = _mm256_set_pd(0.0, x, a5, a2); // NOLINT
|
||||
__m256d a14 = _mm256_set_pd(0.0, 0.0, a4, a1); // NOLINT
|
||||
|
|
|
@ -45,10 +45,4 @@
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if (defined(LLVM_LIBC_ARCH_X86_64) && defined(LIBC_TARGET_HAS_FMA))
|
||||
#define INLINE_FMA __attribute__((target("fma")))
|
||||
#else
|
||||
#define INLINE_FMA
|
||||
#endif // LLVM_LIBC_ARCH_X86_64
|
||||
|
||||
#endif // LLVM_LIBC_SUPPORT_ARCHITECTURES_H
|
||||
|
|
|
@ -51,7 +51,6 @@ add_entrypoint_object(
|
|||
libc.src.__support.FPUtil.fma
|
||||
COMPILE_OPTIONS
|
||||
-O3
|
||||
-mfma
|
||||
)
|
||||
|
||||
add_entrypoint_object(
|
||||
|
@ -65,7 +64,8 @@ add_entrypoint_object(
|
|||
libc.src.__support.FPUtil.fma
|
||||
COMPILE_OPTIONS
|
||||
-O3
|
||||
-mfma
|
||||
FLAGS
|
||||
FMA_OPT__ONLY
|
||||
)
|
||||
|
||||
add_math_entrypoint_object(ceil)
|
||||
|
|
|
@ -13,7 +13,6 @@
|
|||
|
||||
namespace __llvm_libc {
|
||||
|
||||
INLINE_FMA
|
||||
LLVM_LIBC_FUNCTION(double, fma, (double x, double y, double z)) {
|
||||
return fputil::fma(x, y, z);
|
||||
}
|
||||
|
|
|
@ -13,7 +13,6 @@
|
|||
|
||||
namespace __llvm_libc {
|
||||
|
||||
INLINE_FMA
|
||||
LLVM_LIBC_FUNCTION(float, fmaf, (float x, float y, float z)) {
|
||||
return fputil::fma(x, y, z);
|
||||
}
|
||||
|
|
|
@ -482,7 +482,6 @@ add_entrypoint_object(
|
|||
libc.include.math
|
||||
COMPILE_OPTIONS
|
||||
-O3
|
||||
-mfma
|
||||
)
|
||||
|
||||
add_entrypoint_object(
|
||||
|
@ -497,7 +496,6 @@ add_entrypoint_object(
|
|||
libc.include.math
|
||||
COMPILE_OPTIONS
|
||||
-O3
|
||||
-mfma
|
||||
)
|
||||
|
||||
add_entrypoint_object(
|
||||
|
@ -514,7 +512,6 @@ add_entrypoint_object(
|
|||
libc.include.math
|
||||
COMPILE_OPTIONS
|
||||
-O3
|
||||
-mfma
|
||||
)
|
||||
|
||||
add_entrypoint_object(
|
||||
|
@ -682,7 +679,6 @@ add_entrypoint_object(
|
|||
libc.src.__support.FPUtil.polyeval
|
||||
COMPILE_OPTIONS
|
||||
-O3
|
||||
-mfma
|
||||
)
|
||||
|
||||
add_entrypoint_object(
|
||||
|
@ -698,7 +694,6 @@ add_entrypoint_object(
|
|||
libc.src.__support.FPUtil.polyeval
|
||||
COMPILE_OPTIONS
|
||||
-O3
|
||||
-mfma
|
||||
)
|
||||
|
||||
add_entrypoint_object(
|
||||
|
@ -713,7 +708,6 @@ add_entrypoint_object(
|
|||
libc.src.__support.FPUtil.polyeval
|
||||
COMPILE_OPTIONS
|
||||
-O3
|
||||
-mfma
|
||||
)
|
||||
|
||||
add_entrypoint_object(
|
||||
|
@ -729,7 +723,6 @@ add_entrypoint_object(
|
|||
libc.src.__support.FPUtil.polyeval
|
||||
COMPILE_OPTIONS
|
||||
-O3
|
||||
-mfma
|
||||
)
|
||||
|
||||
add_entrypoint_object(
|
||||
|
|
|
@ -47,7 +47,6 @@ static constexpr double EXP_M[64] = {
|
|||
0x1.fa7c1819e90d8p0,
|
||||
};
|
||||
|
||||
INLINE_FMA
|
||||
LLVM_LIBC_FUNCTION(float, exp2f, (float x)) {
|
||||
using FPBits = typename fputil::FPBits<float>;
|
||||
FPBits xbits(x);
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
|
||||
namespace __llvm_libc {
|
||||
|
||||
INLINE_FMA
|
||||
LLVM_LIBC_FUNCTION(float, expf, (float x)) {
|
||||
using FPBits = typename fputil::FPBits<float>;
|
||||
FPBits xbits(x);
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
|
||||
namespace __llvm_libc {
|
||||
|
||||
INLINE_FMA
|
||||
LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
|
||||
using FPBits = typename fputil::FPBits<float>;
|
||||
FPBits xbits(x);
|
||||
|
|
|
@ -101,7 +101,6 @@ static constexpr double LOG10_F[128] = {
|
|||
0x1.2b7b9e258e422p-2, 0x1.2d404b073e27ep-2, 0x1.2f032cf56a5bep-2,
|
||||
0x1.30c4478f0835fp-2, 0x1.32839e681fc62p-2};
|
||||
|
||||
INLINE_FMA
|
||||
LLVM_LIBC_FUNCTION(float, log10f, (float x)) {
|
||||
constexpr double LOG10_2 = 0x1.34413509f79ffp-2;
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ namespace __llvm_libc {
|
|||
namespace internal {
|
||||
|
||||
// We don't need to treat denormal
|
||||
INLINE_FMA static inline float log(double x) {
|
||||
static inline float log(double x) {
|
||||
constexpr double LOG_2 = 0x1.62e42fefa39efp-1;
|
||||
|
||||
using FPBits = typename fputil::FPBits<double>;
|
||||
|
@ -77,7 +77,6 @@ INLINE_FMA static inline float log(double x) {
|
|||
|
||||
} // namespace internal
|
||||
|
||||
INLINE_FMA
|
||||
LLVM_LIBC_FUNCTION(float, log1pf, (float x)) {
|
||||
using FPBits = typename fputil::FPBits<float>;
|
||||
FPBits xbits(x);
|
||||
|
|
|
@ -98,7 +98,6 @@ static constexpr double LOG2_F[128] = {
|
|||
0x1.f16e281db7630p-1, 0x1.f45e08bcf0655p-1, 0x1.f74aef0efafaep-1,
|
||||
0x1.fa34e1177c233p-1, 0x1.fd1be4c7f2af9p-1};
|
||||
|
||||
INLINE_FMA
|
||||
LLVM_LIBC_FUNCTION(float, log2f, (float x)) {
|
||||
using FPBits = typename fputil::FPBits<float>;
|
||||
FPBits xbits(x);
|
||||
|
|
|
@ -49,7 +49,6 @@
|
|||
|
||||
namespace __llvm_libc {
|
||||
|
||||
INLINE_FMA
|
||||
LLVM_LIBC_FUNCTION(float, logf, (float x)) {
|
||||
constexpr double LOG_2 = 0x1.62e42fefa39efp-1;
|
||||
using FPBits = typename fputil::FPBits<float>;
|
||||
|
|
|
@ -1189,6 +1189,9 @@ add_fp_unittest(
|
|||
libc.src.__support.FPUtil.fputil
|
||||
)
|
||||
|
||||
# Without FMA instructions, the current expm1f implementation is not correctly
|
||||
# rounded for all float inputs (1 extra exceptional value). This will be fixed
|
||||
# in the followup patch: https://reviews.llvm.org/D123440
|
||||
add_fp_unittest(
|
||||
expm1f_test
|
||||
NEED_MPFR
|
||||
|
@ -1201,6 +1204,8 @@ add_fp_unittest(
|
|||
libc.include.math
|
||||
libc.src.math.expm1f
|
||||
libc.src.__support.FPUtil.fputil
|
||||
FLAGS
|
||||
FMA_OPT__ONLY
|
||||
)
|
||||
|
||||
add_fp_unittest(
|
||||
|
|
Loading…
Reference in New Issue