From dfd2affa513164da8d6128be014ddc92aa79c31e Mon Sep 17 00:00:00 2001
From: zhoujingya <jing.zhou@terapines.com>
Date: Mon, 30 Oct 2023 10:59:59 +0800
Subject: [PATCH] [VENTUS][fix] Fix float precision issue in libclc for ventus

There are many potential precision bugs in libclc, especially the functions
under `libclc/riscv32/lib/compiler-rt` directory.
---
 libclc/riscv32/lib/compiler-rt/floatdidf.cl   | 16 ++++----
 libclc/riscv32/lib/compiler-rt/floatdisf.cl   | 16 ++++----
 libclc/riscv32/lib/compiler-rt/floatundidf.cl | 14 +++----
 libclc/riscv32/lib/compiler-rt/floatundisf.cl | 27 ++++++------
 libclc/riscv32/lib/compiler-rt/types.h        | 41 +++++++++++++++++++
 5 files changed, 79 insertions(+), 35 deletions(-)

diff --git a/libclc/riscv32/lib/compiler-rt/floatdidf.cl b/libclc/riscv32/lib/compiler-rt/floatdidf.cl
index f77cf99ffc23..4a988c564160 100644
--- a/libclc/riscv32/lib/compiler-rt/floatdidf.cl
+++ b/libclc/riscv32/lib/compiler-rt/floatdidf.cl
@@ -22,32 +22,32 @@ double __floatdidf(di_int a) {
   const unsigned N = sizeof(di_int) * 8;
   const di_int s = a >> (N - 1);
   a = (a ^ s) - s;
-  int sd = N - __builtin_clzll(a);
+  int sd = N - clz64(a);
   int e = sd - 1;
-  if (sd > 53) {
+  if (sd > DBL_MANT_DIG) {
 
     switch (sd) {
-    case 53 + 1:
+    case DBL_MANT_DIG + 1:
       a <<= 1;
       break;
-    case 53 + 2:
+    case DBL_MANT_DIG + 2:
       break;
     default:
-      a = ((du_int)a >> (sd - (53 + 2))) |
-          ((a & ((du_int)(-1) >> ((N + 53 + 2) - sd))) != 0);
+      a = ((du_int)a >> (sd - (DBL_MANT_DIG + 2))) |
+          ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0);
     };
 
     a |= (a & 4) != 0;
     ++a;
     a >>= 2;
 
-    if (a & ((du_int)1 << 53)) {
+    if (a & ((du_int)1 << DBL_MANT_DIG)) {
       a >>= 1;
       ++e;
     }
 
   } else {
-    a <<= (53 - sd);
+    a <<= (DBL_MANT_DIG - sd);
   }
   double_bits fb;
   fb.u.s.high = ((su_int)s & 0x80000000) | ((su_int)(e + 1023) << 20) |
diff --git a/libclc/riscv32/lib/compiler-rt/floatdisf.cl b/libclc/riscv32/lib/compiler-rt/floatdisf.cl
index 8cf423efedc9..c4c5848a3b9a 100644
--- a/libclc/riscv32/lib/compiler-rt/floatdisf.cl
+++ b/libclc/riscv32/lib/compiler-rt/floatdisf.cl
@@ -22,32 +22,32 @@ float __floatdisf(di_int a) {
   const unsigned N = sizeof(di_int) * 8;
   const di_int s = a >> (N - 1);
   a = (a ^ s) - s;
-  int sd = N - __builtin_clzll(a);
+  int sd = N - clz64(a);
   si_int e = sd - 1;
-  if (sd > 24) {
+  if (sd > FLT_MANT_DIG) {
 
     switch (sd) {
-    case 24 + 1:
+    case FLT_MANT_DIG + 1:
       a <<= 1;
       break;
-    case 24 + 2:
+    case FLT_MANT_DIG + 2:
       break;
     default:
-      a = ((du_int)a >> (sd - (24 + 2))) |
-          ((a & ((du_int)(-1) >> ((N + 24 + 2) - sd))) != 0);
+      a = ((du_int)a >> (sd - (FLT_MANT_DIG + 2))) |
+          ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG + 2) - sd))) != 0);
     };
 
     a |= (a & 4) != 0;
     ++a;
     a >>= 2;
 
-    if (a & ((du_int)1 << 24)) {
+    if (a & ((du_int)1 << FLT_MANT_DIG)) {
       a >>= 1;
       ++e;
     }
 
   } else {
-    a <<= (24 - sd);
+    a <<= (FLT_MANT_DIG - sd);
   }
   float_bits fb;
   fb.u =
diff --git a/libclc/riscv32/lib/compiler-rt/floatundidf.cl b/libclc/riscv32/lib/compiler-rt/floatundidf.cl
index 3f713b823c7b..051cebdd936e 100644
--- a/libclc/riscv32/lib/compiler-rt/floatundidf.cl
+++ b/libclc/riscv32/lib/compiler-rt/floatundidf.cl
@@ -22,30 +22,30 @@ double __floatundidf(du_int a) {
   const unsigned N = sizeof(du_int) * 8;
   int sd = N - __builtin_clzll(a);
   int e = sd - 1;
-  if (sd > 53) {
+  if (sd > DBL_MANT_DIG) {
 
     switch (sd) {
-    case 53 + 1:
+    case DBL_MANT_DIG + 1:
       a <<= 1;
       break;
-    case 53 + 2:
+    case DBL_MANT_DIG + 2:
       break;
     default:
-      a = (a >> (sd - (53 + 2))) |
-          ((a & ((du_int)(-1) >> ((N + 53 + 2) - sd))) != 0);
+      a = (a >> (sd - (DBL_MANT_DIG + 2))) |
+          ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0);
     };
 
     a |= (a & 4) != 0;
     ++a;
     a >>= 2;
 
-    if (a & ((du_int)1 << 53)) {
+    if (a & ((du_int)1 << DBL_MANT_DIG)) {
       a >>= 1;
       ++e;
     }
 
   } else {
-    a <<= (53 - sd);
+    a <<= (DBL_MANT_DIG - sd);
   }
   double_bits fb;
   fb.u.s.high = ((su_int)(e + 1023) << 20) | ((su_int)(a >> 32) & 0x000FFFFF);
diff --git a/libclc/riscv32/lib/compiler-rt/floatundisf.cl b/libclc/riscv32/lib/compiler-rt/floatundisf.cl
index f957637383d9..e72295471c52 100644
--- a/libclc/riscv32/lib/compiler-rt/floatundisf.cl
+++ b/libclc/riscv32/lib/compiler-rt/floatundisf.cl
@@ -17,18 +17,19 @@
 
 // seee eeee emmm mmmm mmmm mmmm mmmm mmmm
 
-typedef union {
-  int u;
-  float f;
-} float_bits;
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+#include "types.h"
 
 float __floatundisf(long a) {
   if (a == 0)
     return 0.0F;
   const unsigned N = sizeof(long) * 8;
-  int sd = N - __builtin_clzll(a); // number of significant digits
+  int sd = N - clz64(a); // number of significant digits
   int e = sd - 1;               // 8 exponent
-  if (sd > 24) {
+  if (sd > FLT_MANT_DIG) {
     //  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
     //  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
     //                                                12345678901234567890123456
@@ -37,27 +38,27 @@ float __floatundisf(long a) {
     //  Q = bit FLT_MANT_DIG bits to the right of 1
     //  R = "or" of all bits to the right of Q
     switch (sd) {
-    case 24 + 1:
+    case FLT_MANT_DIG + 1:
       a <<= 1;
       break;
-    case 24 + 2:
+    case FLT_MANT_DIG + 2:
       break;
     default:
-      a = (a >> (sd - (24 + 2))) |
-          ((a & ((long)(-1) >> ((N + 24+ 2) - sd))) != 0);
+      a = (a >> (sd - (FLT_MANT_DIG + 2))) |
+          ((a & ((long)(-1) >> ((N + FLT_MANT_DIG+ 2) - sd))) != 0);
     };
     // finish:
     a |= (a & 4) != 0; // Or P into R
     ++a;               // round - this step may add a significant bit
     a >>= 2;           // dump Q and R
     // a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits
-    if (a & ((long)1 << 24)) {
+    if (a & ((long)1 << FLT_MANT_DIG)) {
       a >>= 1;
       ++e;
     }
     // a is now rounded to FLT_MANT_DIG bits
   } else {
-    a <<= (24 - sd);
+    a <<= (FLT_MANT_DIG - sd);
     // a is now rounded to FLT_MANT_DIG bits
   }
   float_bits fb;
@@ -65,3 +66,5 @@ float __floatundisf(long a) {
          ((unsigned)a & 0x007FFFFF); // mantissa
   return fb.f;
 }
+
+#endif
diff --git a/libclc/riscv32/lib/compiler-rt/types.h b/libclc/riscv32/lib/compiler-rt/types.h
index d3734eb27a93..9c2dc1994361 100644
--- a/libclc/riscv32/lib/compiler-rt/types.h
+++ b/libclc/riscv32/lib/compiler-rt/types.h
@@ -1,6 +1,8 @@
 #ifndef TYPES_H
 #define TYPES_H
 
+#include <float.h>
+
 typedef char char2  __attribute__((__ext_vector_type__(2)));
 typedef char char3  __attribute__((__ext_vector_type__(3)));
 typedef char char4  __attribute__((__ext_vector_type__(4)));
@@ -525,4 +527,43 @@ do {                                                            \
   (d) = __u.value;                                              \
 } while (0)
 
+
+static int __attribute__((noinline)) clzl(unsigned long x)
+//static int inline clzl(unsigned long x)
+{
+    for (int i = 0; i != 64; ++i)
+         if ((x >> (63 - i)) & 1)
+             return i;
+
+    return 0;
+}
+
+static int ctz64(unsigned long x)
+{
+  int r = 63;
+
+  x &= ~x + 1;
+  if (x & 0x00000000FFFFFFFF) r -= 32;
+  if (x & 0x0000FFFF0000FFFF) r -= 16;
+  if (x & 0x00FF00FF00FF00FF) r -= 8;
+  if (x & 0x0F0F0F0F0F0F0F0F) r -= 4;
+  if (x & 0x3333333333333333) r -= 2;
+  if (x & 0x5555555555555555) r -= 1;
+
+  return r;
+}
+
+static int clz64(unsigned long x) {
+  int r = 0;
+
+  if ((x & 0xFFFFFFFF00000000) == 0) r += 32, x <<= 32;
+  if ((x & 0xFFFF000000000000) == 0) r += 16, x <<= 16;
+  if ((x & 0xFF00000000000000) == 0) r += 8,  x <<= 8;
+  if ((x & 0xF000000000000000) == 0) r += 4,  x <<= 4;
+  if ((x & 0xC000000000000000) == 0) r += 2,  x <<= 2;
+  if ((x & 0x8000000000000000) == 0) r += 1,  x <<= 1;
+
+  return r;
+}
+
 #endif // TYPES_H