[libc] Add float type and flag for nearest_integer to enable SSE4.2.

Add float type and flag for nearest integer to automatically test with
and without SSE4.2 flag.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D129916
This commit is contained in:
Tue Ly 2022-07-15 22:59:36 -04:00
parent 06dbcf7b2b
commit ed261e7106
6 changed files with 49 additions and 3 deletions

View File

@ -131,8 +131,14 @@ endfunction(get_fq_dep_list_without_flag)
# Special flags
set(FMA_OPT_FLAG "FMA_OPT")
set(ROUND_OPT_FLAG "ROUND_OPT")
# Skip FMA_OPT flag for targets that don't support fma.
if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "FMA")))
set(SKIP_FLAG_EXPANSION_FMA_OPT TRUE)
endif()
# Skip ROUND_OPT flag for targets that don't support SSE 4.2.
if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2")))
set(SKIP_FLAG_EXPANSION_ROUND_OPT TRUE)
endif()

View File

@ -9,6 +9,14 @@ function(_get_common_compile_options output_var flags)
set(ADD_FMA_FLAG TRUE)
endif()
list(FIND flags ${ROUND_OPT_FLAG} round)
if(${round} LESS 0)
list(FIND flags "${ROUND_OPT_FLAG}__ONLY" round)
endif()
if((${round} GREATER -1) AND (LIBC_CPU_FEATURES MATCHES "SSE4_2"))
set(ADD_SSE4_2_FLAG TRUE)
endif()
set(compile_options ${LIBC_COMPILE_OPTIONS_DEFAULT} ${ARGN})
if(NOT ${LIBC_TARGET_OS} STREQUAL "windows")
set(compile_options ${compile_options} -fpie -ffreestanding -fno-builtin)
@ -21,6 +29,9 @@ function(_get_common_compile_options output_var flags)
if(ADD_FMA_FLAG)
list(APPEND compile_options "-mfma")
endif()
if(ADD_SSE4_2_FLAG)
list(APPEND compile_options "-msse4.2")
endif()
elseif(MSVC)
list(APPEND compile_options "/EHs-c-")
list(APPEND compile_options "/GR-")

View File

@ -75,6 +75,8 @@ add_header_library(
nearest_integer.h
DEPENDS
libc.src.__support.common
FLAGS
ROUND_OPT
)
add_subdirectory(generic)

View File

@ -18,6 +18,12 @@
namespace __llvm_libc {
namespace fputil {
static inline float nearest_integer(float x) {
float result;
__asm__ __volatile__("frintn %s0, %s1\n\t" : "=w"(result) : "w"(x));
return result;
}
static inline double nearest_integer(double x) {
double result;
__asm__ __volatile__("frintn %d0, %d1\n\t" : "=w"(result) : "w"(x));

View File

@ -21,13 +21,27 @@
namespace __llvm_libc {
namespace fputil {
// This is a fast implementation for rounding to a nearest integer that, in case
// of a tie, might pick a random one among 2 closest integers when the rounding
// mode is not FE_TONEAREST.
// This is a fast implementation for rounding to a nearest integer that.
//
// Notice that for AARCH64 and x86-64 with SSE4.2 support, we will use their
// corresponding rounding instruction instead. And in those cases, the results
// are rounded to the nearest integer, tie-to-even.
static inline float nearest_integer(float x) {
if (x < 0x1p24f && x > -0x1p24f) {
float r = x < 0 ? (x - 0x1.0p23f) + 0x1.0p23f : (x + 0x1.0p23f) - 0x1.0p23f;
float diff = x - r;
// The expression above is correct for the default rounding mode, round-to-
// nearest, tie-to-even. For other rounding modes, it might be off by 1,
// which is corrected below.
if (unlikely(diff > 0.5f))
return r + 1.0f;
if (unlikely(diff < -0.5f))
return r - 1.0f;
return r;
}
return x;
}
static inline double nearest_integer(double x) {
if (x < 0x1p53 && x > -0x1p53) {
double r = x < 0 ? (x - 0x1.0p52) + 0x1.0p52 : (x + 0x1.0p52) - 0x1.0p52;

View File

@ -24,6 +24,13 @@
namespace __llvm_libc {
namespace fputil {
static inline float nearest_integer(float x) {
__m128 xmm = _mm_set_ss(x); // NOLINT
__m128 ymm =
_mm_round_ss(xmm, xmm, _MM_ROUND_NEAREST | _MM_FROUND_NO_EXC); // NOLINT
return ymm[0];
}
static inline double nearest_integer(double x) {
__m128d xmm = _mm_set_sd(x); // NOLINT
__m128d ymm =