[libc] Add float type and flag for nearest_integer to enable SSE4.2.
Add float type and flag for nearest integer to automatically test with and without SSE4.2 flag. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D129916
This commit is contained in:
parent
06dbcf7b2b
commit
ed261e7106
|
@ -131,8 +131,14 @@ endfunction(get_fq_dep_list_without_flag)
|
|||
|
||||
# Special flags
|
||||
set(FMA_OPT_FLAG "FMA_OPT")
|
||||
set(ROUND_OPT_FLAG "ROUND_OPT")
|
||||
|
||||
# Skip FMA_OPT flag for targets that don't support fma.
|
||||
if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "FMA")))
|
||||
set(SKIP_FLAG_EXPANSION_FMA_OPT TRUE)
|
||||
endif()
|
||||
|
||||
# Skip ROUND_OPT flag for targets that don't support SSE 4.2.
|
||||
if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2")))
|
||||
set(SKIP_FLAG_EXPANSION_ROUND_OPT TRUE)
|
||||
endif()
|
||||
|
|
|
@ -9,6 +9,14 @@ function(_get_common_compile_options output_var flags)
|
|||
set(ADD_FMA_FLAG TRUE)
|
||||
endif()
|
||||
|
||||
list(FIND flags ${ROUND_OPT_FLAG} round)
|
||||
if(${round} LESS 0)
|
||||
list(FIND flags "${ROUND_OPT_FLAG}__ONLY" round)
|
||||
endif()
|
||||
if((${round} GREATER -1) AND (LIBC_CPU_FEATURES MATCHES "SSE4_2"))
|
||||
set(ADD_SSE4_2_FLAG TRUE)
|
||||
endif()
|
||||
|
||||
set(compile_options ${LIBC_COMPILE_OPTIONS_DEFAULT} ${ARGN})
|
||||
if(NOT ${LIBC_TARGET_OS} STREQUAL "windows")
|
||||
set(compile_options ${compile_options} -fpie -ffreestanding -fno-builtin)
|
||||
|
@ -21,6 +29,9 @@ function(_get_common_compile_options output_var flags)
|
|||
if(ADD_FMA_FLAG)
|
||||
list(APPEND compile_options "-mfma")
|
||||
endif()
|
||||
if(ADD_SSE4_2_FLAG)
|
||||
list(APPEND compile_options "-msse4.2")
|
||||
endif()
|
||||
elseif(MSVC)
|
||||
list(APPEND compile_options "/EHs-c-")
|
||||
list(APPEND compile_options "/GR-")
|
||||
|
|
|
@ -75,6 +75,8 @@ add_header_library(
|
|||
nearest_integer.h
|
||||
DEPENDS
|
||||
libc.src.__support.common
|
||||
FLAGS
|
||||
ROUND_OPT
|
||||
)
|
||||
|
||||
add_subdirectory(generic)
|
||||
|
|
|
@ -18,6 +18,12 @@
|
|||
namespace __llvm_libc {
|
||||
namespace fputil {
|
||||
|
||||
static inline float nearest_integer(float x) {
|
||||
float result;
|
||||
__asm__ __volatile__("frintn %s0, %s1\n\t" : "=w"(result) : "w"(x));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline double nearest_integer(double x) {
|
||||
double result;
|
||||
__asm__ __volatile__("frintn %d0, %d1\n\t" : "=w"(result) : "w"(x));
|
||||
|
|
|
@ -21,13 +21,27 @@
|
|||
namespace __llvm_libc {
|
||||
namespace fputil {
|
||||
|
||||
// This is a fast implementation for rounding to a nearest integer that, in case
|
||||
// of a tie, might pick a random one among 2 closest integers when the rounding
|
||||
// mode is not FE_TONEAREST.
|
||||
// This is a fast implementation for rounding to a nearest integer that.
|
||||
//
|
||||
// Notice that for AARCH64 and x86-64 with SSE4.2 support, we will use their
|
||||
// corresponding rounding instruction instead. And in those cases, the results
|
||||
// are rounded to the nearest integer, tie-to-even.
|
||||
static inline float nearest_integer(float x) {
|
||||
if (x < 0x1p24f && x > -0x1p24f) {
|
||||
float r = x < 0 ? (x - 0x1.0p23f) + 0x1.0p23f : (x + 0x1.0p23f) - 0x1.0p23f;
|
||||
float diff = x - r;
|
||||
// The expression above is correct for the default rounding mode, round-to-
|
||||
// nearest, tie-to-even. For other rounding modes, it might be off by 1,
|
||||
// which is corrected below.
|
||||
if (unlikely(diff > 0.5f))
|
||||
return r + 1.0f;
|
||||
if (unlikely(diff < -0.5f))
|
||||
return r - 1.0f;
|
||||
return r;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
static inline double nearest_integer(double x) {
|
||||
if (x < 0x1p53 && x > -0x1p53) {
|
||||
double r = x < 0 ? (x - 0x1.0p52) + 0x1.0p52 : (x + 0x1.0p52) - 0x1.0p52;
|
||||
|
|
|
@ -24,6 +24,13 @@
|
|||
namespace __llvm_libc {
|
||||
namespace fputil {
|
||||
|
||||
static inline float nearest_integer(float x) {
|
||||
__m128 xmm = _mm_set_ss(x); // NOLINT
|
||||
__m128 ymm =
|
||||
_mm_round_ss(xmm, xmm, _MM_ROUND_NEAREST | _MM_FROUND_NO_EXC); // NOLINT
|
||||
return ymm[0];
|
||||
}
|
||||
|
||||
static inline double nearest_integer(double x) {
|
||||
__m128d xmm = _mm_set_sd(x); // NOLINT
|
||||
__m128d ymm =
|
||||
|
|
Loading…
Reference in New Issue