From dfd2affa513164da8d6128be014ddc92aa79c31e Mon Sep 17 00:00:00 2001 From: zhoujingya Date: Mon, 30 Oct 2023 10:59:59 +0800 Subject: [PATCH] [VENTUS][fix] Fix float precision issue in libclc for ventus There are many potential precision bugs in libclc, especially the functions under `libclc/riscv32/lib/compiler-rt` directory. --- libclc/riscv32/lib/compiler-rt/floatdidf.cl | 16 ++++---- libclc/riscv32/lib/compiler-rt/floatdisf.cl | 16 ++++---- libclc/riscv32/lib/compiler-rt/floatundidf.cl | 14 +++---- libclc/riscv32/lib/compiler-rt/floatundisf.cl | 27 ++++++------ libclc/riscv32/lib/compiler-rt/types.h | 41 +++++++++++++++++++ 5 files changed, 79 insertions(+), 35 deletions(-) diff --git a/libclc/riscv32/lib/compiler-rt/floatdidf.cl b/libclc/riscv32/lib/compiler-rt/floatdidf.cl index f77cf99ffc23..4a988c564160 100644 --- a/libclc/riscv32/lib/compiler-rt/floatdidf.cl +++ b/libclc/riscv32/lib/compiler-rt/floatdidf.cl @@ -22,32 +22,32 @@ double __floatdidf(di_int a) { const unsigned N = sizeof(di_int) * 8; const di_int s = a >> (N - 1); a = (a ^ s) - s; - int sd = N - __builtin_clzll(a); + int sd = N - clz64(a); int e = sd - 1; - if (sd > 53) { + if (sd > DBL_MANT_DIG) { switch (sd) { - case 53 + 1: + case DBL_MANT_DIG + 1: a <<= 1; break; - case 53 + 2: + case DBL_MANT_DIG + 2: break; default: - a = ((du_int)a >> (sd - (53 + 2))) | - ((a & ((du_int)(-1) >> ((N + 53 + 2) - sd))) != 0); + a = ((du_int)a >> (sd - (DBL_MANT_DIG + 2))) | + ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0); }; a |= (a & 4) != 0; ++a; a >>= 2; - if (a & ((du_int)1 << 53)) { + if (a & ((du_int)1 << DBL_MANT_DIG)) { a >>= 1; ++e; } } else { - a <<= (53 - sd); + a <<= (DBL_MANT_DIG - sd); } double_bits fb; fb.u.s.high = ((su_int)s & 0x80000000) | ((su_int)(e + 1023) << 20) | diff --git a/libclc/riscv32/lib/compiler-rt/floatdisf.cl b/libclc/riscv32/lib/compiler-rt/floatdisf.cl index 8cf423efedc9..c4c5848a3b9a 100644 --- a/libclc/riscv32/lib/compiler-rt/floatdisf.cl +++ b/libclc/riscv32/lib/compiler-rt/floatdisf.cl @@ -22,32 +22,32 @@ float __floatdisf(di_int a) { const unsigned N = sizeof(di_int) * 8; const di_int s = a >> (N - 1); a = (a ^ s) - s; - int sd = N - __builtin_clzll(a); + int sd = N - clz64(a); si_int e = sd - 1; - if (sd > 24) { + if (sd > FLT_MANT_DIG) { switch (sd) { - case 24 + 1: + case FLT_MANT_DIG + 1: a <<= 1; break; - case 24 + 2: + case FLT_MANT_DIG + 2: break; default: - a = ((du_int)a >> (sd - (24 + 2))) | - ((a & ((du_int)(-1) >> ((N + 24 + 2) - sd))) != 0); + a = ((du_int)a >> (sd - (FLT_MANT_DIG + 2))) | + ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG + 2) - sd))) != 0); }; a |= (a & 4) != 0; ++a; a >>= 2; - if (a & ((du_int)1 << 24)) { + if (a & ((du_int)1 << FLT_MANT_DIG)) { a >>= 1; ++e; } } else { - a <<= (24 - sd); + a <<= (FLT_MANT_DIG - sd); } float_bits fb; fb.u = diff --git a/libclc/riscv32/lib/compiler-rt/floatundidf.cl b/libclc/riscv32/lib/compiler-rt/floatundidf.cl index 3f713b823c7b..051cebdd936e 100644 --- a/libclc/riscv32/lib/compiler-rt/floatundidf.cl +++ b/libclc/riscv32/lib/compiler-rt/floatundidf.cl @@ -22,30 +22,30 @@ double __floatundidf(du_int a) { const unsigned N = sizeof(du_int) * 8; int sd = N - __builtin_clzll(a); int e = sd - 1; - if (sd > 53) { + if (sd > DBL_MANT_DIG) { switch (sd) { - case 53 + 1: + case DBL_MANT_DIG + 1: a <<= 1; break; - case 53 + 2: + case DBL_MANT_DIG + 2: break; default: - a = (a >> (sd - (53 + 2))) | - ((a & ((du_int)(-1) >> ((N + 53 + 2) - sd))) != 0); + a = (a >> (sd - (DBL_MANT_DIG + 2))) | + ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0); }; a |= (a & 4) != 0; ++a; a >>= 2; - if (a & ((du_int)1 << 53)) { + if (a & ((du_int)1 << DBL_MANT_DIG)) { a >>= 1; ++e; } } else { - a <<= (53 - sd); + a <<= (DBL_MANT_DIG - sd); } double_bits fb; fb.u.s.high = ((su_int)(e + 1023) << 20) | ((su_int)(a >> 32) & 0x000FFFFF); diff --git a/libclc/riscv32/lib/compiler-rt/floatundisf.cl b/libclc/riscv32/lib/compiler-rt/floatundisf.cl index f957637383d9..e72295471c52 100644 --- a/libclc/riscv32/lib/compiler-rt/floatundisf.cl +++ b/libclc/riscv32/lib/compiler-rt/floatundisf.cl @@ -17,18 +17,19 @@ // seee eeee emmm mmmm mmmm mmmm mmmm mmmm -typedef union { - int u; - float f; -} float_bits; +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +#include "types.h" float __floatundisf(long a) { if (a == 0) return 0.0F; const unsigned N = sizeof(long) * 8; - int sd = N - __builtin_clzll(a); // number of significant digits + int sd = N - clz64(a); // number of significant digits int e = sd - 1; // 8 exponent - if (sd > 24) { + if (sd > FLT_MANT_DIG) { // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR // 12345678901234567890123456 @@ -37,27 +38,27 @@ float __floatundisf(long a) { // Q = bit FLT_MANT_DIG bits to the right of 1 // R = "or" of all bits to the right of Q switch (sd) { - case 24 + 1: + case FLT_MANT_DIG + 1: a <<= 1; break; - case 24 + 2: + case FLT_MANT_DIG + 2: break; default: - a = (a >> (sd - (24 + 2))) | - ((a & ((long)(-1) >> ((N + 24+ 2) - sd))) != 0); + a = (a >> (sd - (FLT_MANT_DIG + 2))) | + ((a & ((long)(-1) >> ((N + FLT_MANT_DIG+ 2) - sd))) != 0); }; // finish: a |= (a & 4) != 0; // Or P into R ++a; // round - this step may add a significant bit a >>= 2; // dump Q and R // a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits - if (a & ((long)1 << 24)) { + if (a & ((long)1 << FLT_MANT_DIG)) { a >>= 1; ++e; } // a is now rounded to FLT_MANT_DIG bits } else { - a <<= (24 - sd); + a <<= (FLT_MANT_DIG - sd); // a is now rounded to FLT_MANT_DIG bits } float_bits fb; @@ -65,3 +66,5 @@ float __floatundisf(long a) { ((unsigned)a & 0x007FFFFF); // mantissa return fb.f; } + +#endif diff --git a/libclc/riscv32/lib/compiler-rt/types.h b/libclc/riscv32/lib/compiler-rt/types.h index d3734eb27a93..9c2dc1994361 100644 --- a/libclc/riscv32/lib/compiler-rt/types.h +++ b/libclc/riscv32/lib/compiler-rt/types.h @@ -1,6 +1,8 @@ #ifndef TYPES_H #define TYPES_H +#include + typedef char char2 __attribute__((__ext_vector_type__(2))); typedef char char3 __attribute__((__ext_vector_type__(3))); typedef char char4 __attribute__((__ext_vector_type__(4))); @@ -525,4 +527,43 @@ do { \ (d) = __u.value; \ } while (0) + +static int __attribute__((noinline)) clzl(unsigned long x) +//static int inline clzl(unsigned long x) +{ + for (int i = 0; i != 64; ++i) + if ((x >> (63 - i)) & 1) + return i; + + return 0; +} + +static int ctz64(unsigned long x) +{ + int r = 63; + + x &= ~x + 1; + if (x & 0x00000000FFFFFFFF) r -= 32; + if (x & 0x0000FFFF0000FFFF) r -= 16; + if (x & 0x00FF00FF00FF00FF) r -= 8; + if (x & 0x0F0F0F0F0F0F0F0F) r -= 4; + if (x & 0x3333333333333333) r -= 2; + if (x & 0x5555555555555555) r -= 1; + + return r; +} + +static int clz64(unsigned long x) { + int r = 0; + + if ((x & 0xFFFFFFFF00000000) == 0) r += 32, x <<= 32; + if ((x & 0xFFFF000000000000) == 0) r += 16, x <<= 16; + if ((x & 0xFF00000000000000) == 0) r += 8, x <<= 8; + if ((x & 0xF000000000000000) == 0) r += 4, x <<= 4; + if ((x & 0xC000000000000000) == 0) r += 2, x <<= 2; + if ((x & 0x8000000000000000) == 0) r += 1, x <<= 1; + + return r; +} + #endif // TYPES_H