[libc] Add float type and flag for nearest_integer to enable SSE4.2.

Add float type and flag for nearest integer to automatically test with and without SSE4.2 flag. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D129916
2022-07-15 22:59:36 -04:00 · 2022-07-15 22:59:36 -04:00 · ed261e7106
parent 06dbcf7b2b
commit ed261e7106
6 changed files with 49 additions and 3 deletions
--- a/libc/cmake/modules/LLVMLibCFlagRules.cmake
+++ b/libc/cmake/modules/LLVMLibCFlagRules.cmake
@ -131,8 +131,14 @@ endfunction(get_fq_dep_list_without_flag)

 # Special flags
 set(FMA_OPT_FLAG "FMA_OPT")
+set(ROUND_OPT_FLAG "ROUND_OPT")

 # Skip FMA_OPT flag for targets that don't support fma.
 if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "FMA")))
  set(SKIP_FLAG_EXPANSION_FMA_OPT TRUE)
 endif()
+
+# Skip ROUND_OPT flag for targets that don't support SSE 4.2.
+if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2")))
+  set(SKIP_FLAG_EXPANSION_ROUND_OPT TRUE)
+endif()
--- a/libc/cmake/modules/LLVMLibCObjectRules.cmake
+++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake
@ -9,6 +9,14 @@ function(_get_common_compile_options output_var flags)
    set(ADD_FMA_FLAG TRUE)
  endif()

+  list(FIND flags ${ROUND_OPT_FLAG} round)
+  if(${round} LESS 0)
+    list(FIND flags "${ROUND_OPT_FLAG}__ONLY" round)
+  endif()
+  if((${round} GREATER -1) AND (LIBC_CPU_FEATURES MATCHES "SSE4_2"))
+    set(ADD_SSE4_2_FLAG TRUE)
+  endif()
+
  set(compile_options ${LIBC_COMPILE_OPTIONS_DEFAULT} ${ARGN})
  if(NOT ${LIBC_TARGET_OS} STREQUAL "windows")
    set(compile_options ${compile_options} -fpie -ffreestanding -fno-builtin)
@ -21,6 +29,9 @@ function(_get_common_compile_options output_var flags)
    if(ADD_FMA_FLAG)
      list(APPEND compile_options "-mfma")
    endif()
+    if(ADD_SSE4_2_FLAG)
+      list(APPEND compile_options "-msse4.2")
+    endif()
  elseif(MSVC)
    list(APPEND compile_options "/EHs-c-")
    list(APPEND compile_options "/GR-")
--- a/libc/src/__support/FPUtil/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/CMakeLists.txt
@ -75,6 +75,8 @@ add_header_library(
    nearest_integer.h
  DEPENDS
    libc.src.__support.common
+  FLAGS
+    ROUND_OPT
 )

 add_subdirectory(generic)
--- a/libc/src/__support/FPUtil/aarch64/nearest_integer.h
+++ b/libc/src/__support/FPUtil/aarch64/nearest_integer.h
@ -18,6 +18,12 @@
 namespace __llvm_libc {
 namespace fputil {

+static inline float nearest_integer(float x) {
+  float result;
+  __asm__ __volatile__("frintn %s0, %s1\n\t" : "=w"(result) : "w"(x));
+  return result;
+}
+
 static inline double nearest_integer(double x) {
  double result;
  __asm__ __volatile__("frintn %d0, %d1\n\t" : "=w"(result) : "w"(x));
--- a/libc/src/__support/FPUtil/nearest_integer.h
+++ b/libc/src/__support/FPUtil/nearest_integer.h
@ -21,13 +21,27 @@
 namespace __llvm_libc {
 namespace fputil {

-// This is a fast implementation for rounding to a nearest integer that, in case
-// of a tie, might pick a random one among 2 closest integers when the rounding
-// mode is not FE_TONEAREST.
+// This is a fast implementation for rounding to a nearest integer that.
 //
 // Notice that for AARCH64 and x86-64 with SSE4.2 support, we will use their
 // corresponding rounding instruction instead.  And in those cases, the results
 // are rounded to the nearest integer, tie-to-even.
+static inline float nearest_integer(float x) {
+  if (x < 0x1p24f && x > -0x1p24f) {
+    float r = x < 0 ? (x - 0x1.0p23f) + 0x1.0p23f : (x + 0x1.0p23f) - 0x1.0p23f;
+    float diff = x - r;
+    // The expression above is correct for the default rounding mode, round-to-
+    // nearest, tie-to-even.  For other rounding modes, it might be off by 1,
+    // which is corrected below.
+    if (unlikely(diff > 0.5f))
+      return r + 1.0f;
+    if (unlikely(diff < -0.5f))
+      return r - 1.0f;
+    return r;
+  }
+  return x;
+}
+
 static inline double nearest_integer(double x) {
  if (x < 0x1p53 && x > -0x1p53) {
    double r = x < 0 ? (x - 0x1.0p52) + 0x1.0p52 : (x + 0x1.0p52) - 0x1.0p52;
--- a/libc/src/__support/FPUtil/x86_64/nearest_integer.h
+++ b/libc/src/__support/FPUtil/x86_64/nearest_integer.h
@ -24,6 +24,13 @@
 namespace __llvm_libc {
 namespace fputil {

+static inline float nearest_integer(float x) {
+  __m128 xmm = _mm_set_ss(x); // NOLINT
+  __m128 ymm =
+      _mm_round_ss(xmm, xmm, _MM_ROUND_NEAREST | _MM_FROUND_NO_EXC); // NOLINT
+  return ymm[0];
+}
+
 static inline double nearest_integer(double x) {
  __m128d xmm = _mm_set_sd(x); // NOLINT
  __m128d ymm =