forked from OSchip/llvm-project
[builtins] Make __umodsi3/__udivdi3/__umoddi3 standalone (shift and subtract)
@kamleshbhalui reported that when the Standard Extension M (Multiplication and Division) is disabled for RISC-V, `__udivdi3` will call __udivmodti4 which will in turn calls `__udivdi3`. This patch moves __udivsi3 (shift and subtract) to int_div_impl.inc `__udivXi3`, optimize a bit, add a `__umodXi3`, and use `__udivXi3` and `__umodXi3` to define `__udivsi3` `__umodsi3` `__udivdi3` `__umoddi3`. Reviewed By: kamleshbhalui Differential Revision: https://reviews.llvm.org/D77912
This commit is contained in:
parent
b34c9049cb
commit
b541196eb4
|
@ -0,0 +1,58 @@
|
|||
#define clz(a) (sizeof(a) == 8 ? __builtin_clzll(a) : __builtin_clz(a))
|
||||
|
||||
// Adapted from Figure 3-40 of The PowerPC Compiler Writer's Guide
|
||||
static __inline fixuint_t __udivXi3(fixuint_t n, fixuint_t d) {
|
||||
const unsigned N = sizeof(fixuint_t) * CHAR_BIT;
|
||||
// d == 0 cases are unspecified.
|
||||
unsigned sr = (d ? clz(d) : N) - (n ? clz(n) : N);
|
||||
// 0 <= sr <= N - 1 or sr is very large.
|
||||
if (sr > N - 1) // n < d
|
||||
return 0;
|
||||
if (sr == N - 1) // d == 1
|
||||
return n;
|
||||
++sr;
|
||||
// 1 <= sr <= N - 1. Shifts do not trigger UB.
|
||||
fixuint_t r = n >> sr;
|
||||
n <<= N - sr;
|
||||
fixuint_t carry = 0;
|
||||
for (; sr > 0; --sr) {
|
||||
r = (r << 1) | (n >> (N - 1));
|
||||
n = (n << 1) | carry;
|
||||
// Branch-less version of:
|
||||
// carry = 0;
|
||||
// if (r >= d) r -= d, carry = 1;
|
||||
const fixint_t s = (fixint_t)(d - r - 1) >> (N - 1);
|
||||
carry = s & 1;
|
||||
r -= d & s;
|
||||
}
|
||||
n = (n << 1) | carry;
|
||||
return n;
|
||||
}
|
||||
|
||||
// Mostly identical to __udivXi3 but the return values are different.
|
||||
static __inline fixuint_t __umodXi3(fixuint_t n, fixuint_t d) {
|
||||
const unsigned N = sizeof(fixuint_t) * CHAR_BIT;
|
||||
// d == 0 cases are unspecified.
|
||||
unsigned sr = (d ? clz(d) : N) - (n ? clz(n) : N);
|
||||
// 0 <= sr <= N - 1 or sr is very large.
|
||||
if (sr > N - 1) // n < d
|
||||
return n;
|
||||
if (sr == N - 1) // d == 1
|
||||
return 0;
|
||||
++sr;
|
||||
// 1 <= sr <= N - 1. Shifts do not trigger UB.
|
||||
fixuint_t r = n >> sr;
|
||||
n <<= N - sr;
|
||||
fixuint_t carry = 0;
|
||||
for (; sr > 0; --sr) {
|
||||
r = (r << 1) | (n >> (N - 1));
|
||||
n = (n << 1) | carry;
|
||||
// Branch-less version of:
|
||||
// carry = 0;
|
||||
// if (r >= d) r -= d, carry = 1;
|
||||
const fixint_t s = (fixint_t)(d - r - 1) >> (N - 1);
|
||||
carry = s & 1;
|
||||
r -= d & s;
|
||||
}
|
||||
return r;
|
||||
}
|
|
@ -12,8 +12,12 @@
|
|||
|
||||
#include "int_lib.h"
|
||||
|
||||
typedef du_int fixuint_t;
|
||||
typedef di_int fixint_t;
|
||||
#include "int_div_impl.inc"
|
||||
|
||||
// Returns: a / b
|
||||
|
||||
COMPILER_RT_ABI du_int __udivdi3(du_int a, du_int b) {
|
||||
return __udivmoddi4(a, b, 0);
|
||||
return __udivXi3(a, b);
|
||||
}
|
||||
|
|
|
@ -12,49 +12,14 @@
|
|||
|
||||
#include "int_lib.h"
|
||||
|
||||
typedef su_int fixuint_t;
|
||||
typedef si_int fixint_t;
|
||||
#include "int_div_impl.inc"
|
||||
|
||||
// Returns: a / b
|
||||
|
||||
// Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide
|
||||
|
||||
// This function should not call __divsi3!
|
||||
COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d) {
|
||||
const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
|
||||
su_int q;
|
||||
su_int r;
|
||||
unsigned sr;
|
||||
// special cases
|
||||
if (d == 0)
|
||||
return 0; // ?!
|
||||
if (n == 0)
|
||||
return 0;
|
||||
sr = __builtin_clz(d) - __builtin_clz(n);
|
||||
// 0 <= sr <= n_uword_bits - 1 or sr large
|
||||
if (sr > n_uword_bits - 1) // d > r
|
||||
return 0;
|
||||
if (sr == n_uword_bits - 1) // d == 1
|
||||
return n;
|
||||
++sr;
|
||||
// 1 <= sr <= n_uword_bits - 1
|
||||
// Not a special case
|
||||
q = n << (n_uword_bits - sr);
|
||||
r = n >> sr;
|
||||
su_int carry = 0;
|
||||
for (; sr > 0; --sr) {
|
||||
// r:q = ((r:q) << 1) | carry
|
||||
r = (r << 1) | (q >> (n_uword_bits - 1));
|
||||
q = (q << 1) | carry;
|
||||
// carry = 0;
|
||||
// if (r.all >= d.all)
|
||||
// {
|
||||
// r.all -= d.all;
|
||||
// carry = 1;
|
||||
// }
|
||||
const si_int s = (si_int)(d - r - 1) >> (n_uword_bits - 1);
|
||||
carry = s & 1;
|
||||
r -= d & s;
|
||||
}
|
||||
q = (q << 1) | carry;
|
||||
return q;
|
||||
COMPILER_RT_ABI su_int __udivsi3(su_int a, su_int b) {
|
||||
return __udivXi3(a, b);
|
||||
}
|
||||
|
||||
#if defined(__ARM_EABI__)
|
||||
|
|
|
@ -12,10 +12,12 @@
|
|||
|
||||
#include "int_lib.h"
|
||||
|
||||
typedef du_int fixuint_t;
|
||||
typedef di_int fixint_t;
|
||||
#include "int_div_impl.inc"
|
||||
|
||||
// Returns: a % b
|
||||
|
||||
COMPILER_RT_ABI du_int __umoddi3(du_int a, du_int b) {
|
||||
du_int r;
|
||||
__udivmoddi4(a, b, &r);
|
||||
return r;
|
||||
return __umodXi3(a, b);
|
||||
}
|
||||
|
|
|
@ -12,8 +12,12 @@
|
|||
|
||||
#include "int_lib.h"
|
||||
|
||||
typedef su_int fixuint_t;
|
||||
typedef si_int fixint_t;
|
||||
#include "int_div_impl.inc"
|
||||
|
||||
// Returns: a % b
|
||||
|
||||
COMPILER_RT_ABI su_int __umodsi3(su_int a, su_int b) {
|
||||
return a - __udivsi3(a, b) * b;
|
||||
return __umodXi3(a, b);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue