[VENTUS][fix] Fix float precision issue in libclc for ventus

There are many potential precision bugs in libclc, especially the functions under `libclc/riscv32/lib/compiler-rt` directory.
2023-10-30 10:59:59 +08:00 · 2023-10-30 10:59:59 +08:00 · dfd2affa51
parent 91b9888732
commit dfd2affa51
5 changed files with 79 additions and 35 deletions
--- a/libclc/riscv32/lib/compiler-rt/floatdidf.cl
+++ b/libclc/riscv32/lib/compiler-rt/floatdidf.cl
@ -22,32 +22,32 @@ double __floatdidf(di_int a) {
  const unsigned N = sizeof(di_int) * 8;
  const di_int s = a >> (N - 1);
  a = (a ^ s) - s;
-  int sd = N - __builtin_clzll(a);
+  int sd = N - clz64(a);
  int e = sd - 1;
-  if (sd > 53) {
+  if (sd > DBL_MANT_DIG) {

    switch (sd) {
-    case 53 + 1:
+    case DBL_MANT_DIG + 1:
      a <<= 1;
      break;
-    case 53 + 2:
+    case DBL_MANT_DIG + 2:
      break;
    default:
-      a = ((du_int)a >> (sd - (53 + 2))) |
-          ((a & ((du_int)(-1) >> ((N + 53 + 2) - sd))) != 0);
+      a = ((du_int)a >> (sd - (DBL_MANT_DIG + 2))) |
+          ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0);
    };

    a |= (a & 4) != 0;
    ++a;
    a >>= 2;

-    if (a & ((du_int)1 << 53)) {
+    if (a & ((du_int)1 << DBL_MANT_DIG)) {
      a >>= 1;
      ++e;
    }

  } else {
-    a <<= (53 - sd);
+    a <<= (DBL_MANT_DIG - sd);
  }
  double_bits fb;
  fb.u.s.high = ((su_int)s & 0x80000000) | ((su_int)(e + 1023) << 20) |
--- a/libclc/riscv32/lib/compiler-rt/floatdisf.cl
+++ b/libclc/riscv32/lib/compiler-rt/floatdisf.cl
@ -22,32 +22,32 @@ float __floatdisf(di_int a) {
  const unsigned N = sizeof(di_int) * 8;
  const di_int s = a >> (N - 1);
  a = (a ^ s) - s;
-  int sd = N - __builtin_clzll(a);
+  int sd = N - clz64(a);
  si_int e = sd - 1;
-  if (sd > 24) {
+  if (sd > FLT_MANT_DIG) {

    switch (sd) {
-    case 24 + 1:
+    case FLT_MANT_DIG + 1:
      a <<= 1;
      break;
-    case 24 + 2:
+    case FLT_MANT_DIG + 2:
      break;
    default:
-      a = ((du_int)a >> (sd - (24 + 2))) |
-          ((a & ((du_int)(-1) >> ((N + 24 + 2) - sd))) != 0);
+      a = ((du_int)a >> (sd - (FLT_MANT_DIG + 2))) |
+          ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG + 2) - sd))) != 0);
    };

    a |= (a & 4) != 0;
    ++a;
    a >>= 2;

-    if (a & ((du_int)1 << 24)) {
+    if (a & ((du_int)1 << FLT_MANT_DIG)) {
      a >>= 1;
      ++e;
    }

  } else {
-    a <<= (24 - sd);
+    a <<= (FLT_MANT_DIG - sd);
  }
  float_bits fb;
  fb.u =
--- a/libclc/riscv32/lib/compiler-rt/floatundidf.cl
+++ b/libclc/riscv32/lib/compiler-rt/floatundidf.cl
@ -22,30 +22,30 @@ double __floatundidf(du_int a) {
  const unsigned N = sizeof(du_int) * 8;
  int sd = N - __builtin_clzll(a);
  int e = sd - 1;
-  if (sd > 53) {
+  if (sd > DBL_MANT_DIG) {

    switch (sd) {
-    case 53 + 1:
+    case DBL_MANT_DIG + 1:
      a <<= 1;
      break;
-    case 53 + 2:
+    case DBL_MANT_DIG + 2:
      break;
    default:
-      a = (a >> (sd - (53 + 2))) |
-          ((a & ((du_int)(-1) >> ((N + 53 + 2) - sd))) != 0);
+      a = (a >> (sd - (DBL_MANT_DIG + 2))) |
+          ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0);
    };

    a |= (a & 4) != 0;
    ++a;
    a >>= 2;

-    if (a & ((du_int)1 << 53)) {
+    if (a & ((du_int)1 << DBL_MANT_DIG)) {
      a >>= 1;
      ++e;
    }

  } else {
-    a <<= (53 - sd);
+    a <<= (DBL_MANT_DIG - sd);
  }
  double_bits fb;
  fb.u.s.high = ((su_int)(e + 1023) << 20) | ((su_int)(a >> 32) & 0x000FFFFF);
--- a/libclc/riscv32/lib/compiler-rt/floatundisf.cl
+++ b/libclc/riscv32/lib/compiler-rt/floatundisf.cl
@ -17,18 +17,19 @@

 // seee eeee emmm mmmm mmmm mmmm mmmm mmmm

-typedef union {
-  int u;
-  float f;
-} float_bits;
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+#include "types.h"

 float __floatundisf(long a) {
  if (a == 0)
    return 0.0F;
  const unsigned N = sizeof(long) * 8;
-  int sd = N - __builtin_clzll(a); // number of significant digits
+  int sd = N - clz64(a); // number of significant digits
  int e = sd - 1;               // 8 exponent
-  if (sd > 24) {
+  if (sd > FLT_MANT_DIG) {
    //  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
    //  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
    //                                                12345678901234567890123456
@ -37,27 +38,27 @@ float __floatundisf(long a) {
    //  Q = bit FLT_MANT_DIG bits to the right of 1
    //  R = "or" of all bits to the right of Q
    switch (sd) {
-    case 24 + 1:
+    case FLT_MANT_DIG + 1:
      a <<= 1;
      break;
-    case 24 + 2:
+    case FLT_MANT_DIG + 2:
      break;
    default:
-      a = (a >> (sd - (24 + 2))) |
-          ((a & ((long)(-1) >> ((N + 24+ 2) - sd))) != 0);
+      a = (a >> (sd - (FLT_MANT_DIG + 2))) |
+          ((a & ((long)(-1) >> ((N + FLT_MANT_DIG+ 2) - sd))) != 0);
    };
    // finish:
    a |= (a & 4) != 0; // Or P into R
    ++a;               // round - this step may add a significant bit
    a >>= 2;           // dump Q and R
    // a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits
-    if (a & ((long)1 << 24)) {
+    if (a & ((long)1 << FLT_MANT_DIG)) {
      a >>= 1;
      ++e;
    }
    // a is now rounded to FLT_MANT_DIG bits
  } else {
-    a <<= (24 - sd);
+    a <<= (FLT_MANT_DIG - sd);
    // a is now rounded to FLT_MANT_DIG bits
  }
  float_bits fb;
@ -65,3 +66,5 @@ float __floatundisf(long a) {
         ((unsigned)a & 0x007FFFFF); // mantissa
  return fb.f;
 }
+
+#endif
--- a/libclc/riscv32/lib/compiler-rt/types.h
+++ b/libclc/riscv32/lib/compiler-rt/types.h
@ -1,6 +1,8 @@
 #ifndef TYPES_H
 #define TYPES_H

+#include <float.h>
+
 typedef char char2  __attribute__((__ext_vector_type__(2)));
 typedef char char3  __attribute__((__ext_vector_type__(3)));
 typedef char char4  __attribute__((__ext_vector_type__(4)));
@ -525,4 +527,43 @@ do {                                                            \
  (d) = __u.value;                                              \
 } while (0)

+
+static int __attribute__((noinline)) clzl(unsigned long x)
+//static int inline clzl(unsigned long x)
+{
+    for (int i = 0; i != 64; ++i)
+         if ((x >> (63 - i)) & 1)
+             return i;
+
+    return 0;
+}
+
+static int ctz64(unsigned long x)
+{
+  int r = 63;
+
+  x &= ~x + 1;
+  if (x & 0x00000000FFFFFFFF) r -= 32;
+  if (x & 0x0000FFFF0000FFFF) r -= 16;
+  if (x & 0x00FF00FF00FF00FF) r -= 8;
+  if (x & 0x0F0F0F0F0F0F0F0F) r -= 4;
+  if (x & 0x3333333333333333) r -= 2;
+  if (x & 0x5555555555555555) r -= 1;
+
+  return r;
+}
+
+static int clz64(unsigned long x) {
+  int r = 0;
+
+  if ((x & 0xFFFFFFFF00000000) == 0) r += 32, x <<= 32;
+  if ((x & 0xFFFF000000000000) == 0) r += 16, x <<= 16;
+  if ((x & 0xFF00000000000000) == 0) r += 8,  x <<= 8;
+  if ((x & 0xF000000000000000) == 0) r += 4,  x <<= 4;
+  if ((x & 0xC000000000000000) == 0) r += 2,  x <<= 2;
+  if ((x & 0x8000000000000000) == 0) r += 1,  x <<= 1;
+
+  return r;
+}
+
 #endif // TYPES_H