Extend `performANDCSELCombine` to `performANDORCSELCombine`
Differential Revision: https://reviews.llvm.org/D120422
This commit is contained in:
parent
6467d1d275
commit
43a0016f3d
|
@ -14034,15 +14034,85 @@ static SDValue tryCombineToBSL(SDNode *N,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
// Given a tree of and/or(csel(0, 1, cc0), csel(0, 1, cc1)), we may be able to
|
||||
// convert to csel(ccmp(.., cc0)), depending on cc1:
|
||||
|
||||
// (AND (CSET cc0 cmp0) (CSET cc1 (CMP x1 y1)))
|
||||
// =>
|
||||
// (CSET cc1 (CCMP x1 y1 !cc1 cc0 cmp0))
|
||||
//
|
||||
// (OR (CSET cc0 cmp0) (CSET cc1 (CMP x1 y1)))
|
||||
// =>
|
||||
// (CSET cc1 (CCMP x1 y1 cc1 !cc0 cmp0))
|
||||
static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue CSel0 = N->getOperand(0);
|
||||
SDValue CSel1 = N->getOperand(1);
|
||||
|
||||
if (CSel0.getOpcode() != AArch64ISD::CSEL ||
|
||||
CSel1.getOpcode() != AArch64ISD::CSEL)
|
||||
return SDValue();
|
||||
|
||||
if (!CSel0->hasOneUse() || !CSel1->hasOneUse())
|
||||
return SDValue();
|
||||
|
||||
if (!isNullConstant(CSel0.getOperand(0)) ||
|
||||
!isOneConstant(CSel0.getOperand(1)) ||
|
||||
!isNullConstant(CSel1.getOperand(0)) ||
|
||||
!isOneConstant(CSel1.getOperand(1)))
|
||||
return SDValue();
|
||||
|
||||
SDValue Cmp0 = CSel0.getOperand(3);
|
||||
SDValue Cmp1 = CSel1.getOperand(3);
|
||||
AArch64CC::CondCode CC0 = (AArch64CC::CondCode)CSel0.getConstantOperandVal(2);
|
||||
AArch64CC::CondCode CC1 = (AArch64CC::CondCode)CSel1.getConstantOperandVal(2);
|
||||
if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
|
||||
return SDValue();
|
||||
if (Cmp1.getOpcode() != AArch64ISD::SUBS &&
|
||||
Cmp0.getOpcode() == AArch64ISD::SUBS) {
|
||||
std::swap(Cmp0, Cmp1);
|
||||
std::swap(CC0, CC1);
|
||||
}
|
||||
|
||||
if (Cmp1.getOpcode() != AArch64ISD::SUBS)
|
||||
return SDValue();
|
||||
|
||||
SDLoc DL(N);
|
||||
SDValue CCmp;
|
||||
|
||||
if (N->getOpcode() == ISD::AND) {
|
||||
AArch64CC::CondCode InvCC0 = AArch64CC::getInvertedCondCode(CC0);
|
||||
SDValue Condition = DAG.getConstant(InvCC0, DL, MVT_CC);
|
||||
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CC1);
|
||||
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
|
||||
CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
|
||||
Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
|
||||
} else {
|
||||
SDLoc DL(N);
|
||||
AArch64CC::CondCode InvCC1 = AArch64CC::getInvertedCondCode(CC1);
|
||||
SDValue Condition = DAG.getConstant(CC0, DL, MVT_CC);
|
||||
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvCC1);
|
||||
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
|
||||
CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
|
||||
Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
|
||||
}
|
||||
return DAG.getNode(AArch64ISD::CSEL, DL, VT, CSel0.getOperand(0),
|
||||
CSel0.getOperand(1), DAG.getConstant(CC1, DL, MVT::i32),
|
||||
CCmp);
|
||||
}
|
||||
|
||||
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
|
||||
const AArch64Subtarget *Subtarget) {
|
||||
// Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
if (SDValue R = performANDORCSELCombine(N, DAG))
|
||||
return R;
|
||||
|
||||
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
|
||||
return SDValue();
|
||||
|
||||
// Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
|
||||
if (SDValue Res = tryCombineToEXTR(N, DCI))
|
||||
return Res;
|
||||
|
||||
|
@ -14171,60 +14241,13 @@ static SDValue performSVEAndCombine(SDNode *N,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
// Given a tree of and(csel(0, 1, cc0), csel(0, 1, cc1)), we may be able to
|
||||
// convert to csel(ccmp(.., cc0)), depending on cc1.
|
||||
static SDValue PerformANDCSELCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue CSel0 = N->getOperand(0);
|
||||
SDValue CSel1 = N->getOperand(1);
|
||||
|
||||
if (CSel0.getOpcode() != AArch64ISD::CSEL ||
|
||||
CSel1.getOpcode() != AArch64ISD::CSEL)
|
||||
return SDValue();
|
||||
|
||||
if (!CSel0->hasOneUse() || !CSel1->hasOneUse())
|
||||
return SDValue();
|
||||
|
||||
if (!isNullConstant(CSel0.getOperand(0)) ||
|
||||
!isOneConstant(CSel0.getOperand(1)) ||
|
||||
!isNullConstant(CSel1.getOperand(0)) ||
|
||||
!isOneConstant(CSel1.getOperand(1)))
|
||||
return SDValue();
|
||||
|
||||
SDValue Cmp0 = CSel0.getOperand(3);
|
||||
SDValue Cmp1 = CSel1.getOperand(3);
|
||||
AArch64CC::CondCode CC0 = (AArch64CC::CondCode)CSel0.getConstantOperandVal(2);
|
||||
AArch64CC::CondCode CC1 = (AArch64CC::CondCode)CSel1.getConstantOperandVal(2);
|
||||
if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
|
||||
return SDValue();
|
||||
if (Cmp1.getOpcode() != AArch64ISD::SUBS &&
|
||||
Cmp0.getOpcode() == AArch64ISD::SUBS) {
|
||||
std::swap(Cmp0, Cmp1);
|
||||
std::swap(CC0, CC1);
|
||||
}
|
||||
|
||||
if (Cmp1.getOpcode() != AArch64ISD::SUBS)
|
||||
return SDValue();
|
||||
|
||||
SDLoc DL(N);
|
||||
AArch64CC::CondCode InvCC0 = AArch64CC::getInvertedCondCode(CC0);
|
||||
SDValue Condition = DAG.getConstant(InvCC0, DL, MVT_CC);
|
||||
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CC1);
|
||||
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
|
||||
SDValue CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
|
||||
Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
|
||||
return DAG.getNode(AArch64ISD::CSEL, DL, VT, CSel0.getOperand(0),
|
||||
CSel0.getOperand(1), DAG.getConstant(CC1, DL, MVT::i32),
|
||||
CCmp);
|
||||
}
|
||||
|
||||
static SDValue performANDCombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
SDValue LHS = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
if (SDValue R = PerformANDCSELCombine(N, DAG))
|
||||
if (SDValue R = performANDORCSELCombine(N, DAG))
|
||||
return R;
|
||||
|
||||
if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
|
||||
|
|
|
@ -754,16 +754,12 @@ define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
|
|||
|
||||
@g = global i32 0
|
||||
|
||||
; Should not use ccmp if we have to compute the or expression in an integer
|
||||
; register anyway because of other users.
|
||||
define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
|
||||
; CHECK-LABEL: select_noccmp2:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: cmp x0, #0
|
||||
; CHECK-NEXT: cset w8, lt
|
||||
; CHECK-NEXT: cmp x0, #13
|
||||
; CHECK-NEXT: cset w9, gt
|
||||
; CHECK-NEXT: orr w8, w8, w9
|
||||
; CHECK-NEXT: ccmp x0, #13, #0, ge
|
||||
; CHECK-NEXT: cset w8, gt
|
||||
; CHECK-NEXT: cmp w8, #0
|
||||
; CHECK-NEXT: csel x0, xzr, x3, ne
|
||||
; CHECK-NEXT: sbfx w8, w8, #0, #1
|
||||
|
@ -799,21 +795,17 @@ define i32 @select_noccmp3(i32 %v0, i32 %v1, i32 %v2) {
|
|||
; CHECK-LABEL: select_noccmp3:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: cmp w0, #0
|
||||
; CHECK-NEXT: cset w8, lt
|
||||
; CHECK-NEXT: cmp w0, #13
|
||||
; CHECK-NEXT: cset w9, gt
|
||||
; CHECK-NEXT: ccmp w0, #13, #0, ge
|
||||
; CHECK-NEXT: cset w8, gt
|
||||
; CHECK-NEXT: cmp w0, #22
|
||||
; CHECK-NEXT: cset w10, lt
|
||||
; CHECK-NEXT: cmp w0, #44
|
||||
; CHECK-NEXT: cset w11, gt
|
||||
; CHECK-NEXT: mov w9, #44
|
||||
; CHECK-NEXT: ccmp w0, w9, #0, ge
|
||||
; CHECK-NEXT: cset w9, gt
|
||||
; CHECK-NEXT: cmp w0, #99
|
||||
; CHECK-NEXT: cset w12, eq
|
||||
; CHECK-NEXT: cmp w0, #77
|
||||
; CHECK-NEXT: cset w13, eq
|
||||
; CHECK-NEXT: orr w8, w8, w9
|
||||
; CHECK-NEXT: orr w9, w10, w11
|
||||
; CHECK-NEXT: and w8, w8, w9
|
||||
; CHECK-NEXT: orr w9, w12, w13
|
||||
; CHECK-NEXT: mov w9, #77
|
||||
; CHECK-NEXT: ccmp w0, w9, #4, ne
|
||||
; CHECK-NEXT: cset w9, eq
|
||||
; CHECK-NEXT: tst w8, w9
|
||||
; CHECK-NEXT: csel w0, w1, w2, ne
|
||||
; CHECK-NEXT: ret
|
||||
|
|
|
@ -257,13 +257,12 @@ define dso_local i1 @test_setcc3() {
|
|||
; CHECK-NEXT: ldr q1, [x8, :lo12:rhs]
|
||||
; CHECK-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill
|
||||
; CHECK-NEXT: bl __eqtf2
|
||||
; CHECK-NEXT: cmp w0, #0
|
||||
; CHECK-NEXT: cset w19, eq
|
||||
; CHECK-NEXT: mov x19, x0
|
||||
; CHECK-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
|
||||
; CHECK-NEXT: bl __unordtf2
|
||||
; CHECK-NEXT: cmp w0, #0
|
||||
; CHECK-NEXT: cset w8, ne
|
||||
; CHECK-NEXT: orr w0, w8, w19
|
||||
; CHECK-NEXT: ccmp w19, #0, #4, eq
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: add sp, sp, #48
|
||||
; CHECK-NEXT: ret
|
||||
|
|
|
@ -0,0 +1,145 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
|
||||
|
||||
; Ensure chains of comparisons produce chains of `ccmp`
|
||||
|
||||
; (x0 < x1) && (x2 > x3)
|
||||
define i32 @cmp_and2(i32 %0, i32 %1, i32 %2, i32 %3) {
|
||||
; CHECK-LABEL: cmp_and2:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: cmp w0, w1
|
||||
; CHECK-NEXT: ccmp w2, w3, #0, lo
|
||||
; CHECK-NEXT: cset w0, hi
|
||||
; CHECK-NEXT: ret
|
||||
%5 = icmp ult i32 %0, %1
|
||||
%6 = icmp ugt i32 %2, %3
|
||||
%7 = select i1 %5, i1 %6, i1 false
|
||||
%8 = zext i1 %7 to i32
|
||||
ret i32 %8
|
||||
}
|
||||
|
||||
; (x0 < x1) && (x2 > x3) && (x4 != x5)
|
||||
define i32 @cmp_and3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
|
||||
; CHECK-LABEL: cmp_and3:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: cmp w0, w1
|
||||
; CHECK-NEXT: ccmp w2, w3, #0, lo
|
||||
; CHECK-NEXT: ccmp w4, w5, #4, hi
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%7 = icmp ult i32 %0, %1
|
||||
%8 = icmp ugt i32 %2, %3
|
||||
%9 = select i1 %7, i1 %8, i1 false
|
||||
%10 = icmp ne i32 %4, %5
|
||||
%11 = select i1 %9, i1 %10, i1 false
|
||||
%12 = zext i1 %11 to i32
|
||||
ret i32 %12
|
||||
}
|
||||
|
||||
; (x0 < x1) && (x2 > x3) && (x4 != x5) && (x6 == x7)
|
||||
define i32 @cmp_and4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
|
||||
; CHECK-LABEL: cmp_and4:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: cmp w2, w3
|
||||
; CHECK-NEXT: ccmp w0, w1, #2, hi
|
||||
; CHECK-NEXT: ccmp w4, w5, #4, lo
|
||||
; CHECK-NEXT: ccmp w6, w7, #0, ne
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ret
|
||||
%9 = icmp ugt i32 %2, %3
|
||||
%10 = icmp ult i32 %0, %1
|
||||
%11 = select i1 %9, i1 %10, i1 false
|
||||
%12 = icmp ne i32 %4, %5
|
||||
%13 = select i1 %11, i1 %12, i1 false
|
||||
%14 = icmp eq i32 %6, %7
|
||||
%15 = select i1 %13, i1 %14, i1 false
|
||||
%16 = zext i1 %15 to i32
|
||||
ret i32 %16
|
||||
}
|
||||
|
||||
; (x0 < x1) || (x2 > x3)
|
||||
define i32 @cmp_or2(i32 %0, i32 %1, i32 %2, i32 %3) {
|
||||
; CHECK-LABEL: cmp_or2:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: cmp w0, w1
|
||||
; CHECK-NEXT: ccmp w2, w3, #0, hs
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%5 = icmp ult i32 %0, %1
|
||||
%6 = icmp ne i32 %2, %3
|
||||
%7 = select i1 %5, i1 true, i1 %6
|
||||
%8 = zext i1 %7 to i32
|
||||
ret i32 %8
|
||||
}
|
||||
|
||||
; (x0 < x1) || (x2 > x3) || (x4 != x5)
|
||||
define i32 @cmp_or3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
|
||||
; CHECK-LABEL: cmp_or3:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: cmp w0, w1
|
||||
; CHECK-NEXT: ccmp w2, w3, #2, hs
|
||||
; CHECK-NEXT: ccmp w4, w5, #0, ls
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%7 = icmp ult i32 %0, %1
|
||||
%8 = icmp ugt i32 %2, %3
|
||||
%9 = select i1 %7, i1 true, i1 %8
|
||||
%10 = icmp ne i32 %4, %5
|
||||
%11 = select i1 %9, i1 true, i1 %10
|
||||
%12 = zext i1 %11 to i32
|
||||
ret i32 %12
|
||||
}
|
||||
|
||||
; (x0 < x1) || (x2 > x3) || (x4 != x5) || (x6 == x7)
|
||||
define i32 @cmp_or4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
|
||||
; CHECK-LABEL: cmp_or4:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: cmp w0, w1
|
||||
; CHECK-NEXT: ccmp w2, w3, #2, hs
|
||||
; CHECK-NEXT: ccmp w4, w5, #0, ls
|
||||
; CHECK-NEXT: ccmp w6, w7, #4, eq
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ret
|
||||
%9 = icmp ult i32 %0, %1
|
||||
%10 = icmp ugt i32 %2, %3
|
||||
%11 = select i1 %9, i1 true, i1 %10
|
||||
%12 = icmp ne i32 %4, %5
|
||||
%13 = select i1 %11, i1 true, i1 %12
|
||||
%14 = icmp eq i32 %6, %7
|
||||
%15 = select i1 %13, i1 true, i1 %14
|
||||
%16 = zext i1 %15 to i32
|
||||
ret i32 %16
|
||||
}
|
||||
|
||||
; (x0 != 0) || (x1 != 0)
|
||||
define i32 @true_or2(i32 %0, i32 %1) {
|
||||
; CHECK-LABEL: true_or2:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: orr w8, w0, w1
|
||||
; CHECK-NEXT: cmp w8, #0
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%3 = icmp ne i32 %0, 0
|
||||
%4 = icmp ne i32 %1, 0
|
||||
%5 = select i1 %3, i1 true, i1 %4
|
||||
%6 = zext i1 %5 to i32
|
||||
ret i32 %6
|
||||
}
|
||||
|
||||
; (x0 != 0) || (x1 != 0) || (x2 != 0)
|
||||
define i32 @true_or3(i32 %0, i32 %1, i32 %2) {
|
||||
; CHECK-LABEL: true_or3:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: orr w8, w0, w1
|
||||
; CHECK-NEXT: orr w8, w8, w2
|
||||
; CHECK-NEXT: cmp w8, #0
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%4 = icmp ne i32 %0, 0
|
||||
%5 = icmp ne i32 %1, 0
|
||||
%6 = select i1 %4, i1 true, i1 %5
|
||||
%7 = icmp ne i32 %2, 0
|
||||
%8 = select i1 %6, i1 true, i1 %7
|
||||
%9 = zext i1 %8 to i32
|
||||
ret i32 %9
|
||||
}
|
|
@ -18,10 +18,8 @@ define i1 @or(i32 %x, i32 %y, i32 %z, i32 %w) {
|
|||
; CHECK-LABEL: or:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: cmp w0, w1
|
||||
; CHECK-NEXT: cset w8, eq
|
||||
; CHECK-NEXT: cmp w2, w3
|
||||
; CHECK-NEXT: cset w9, gt
|
||||
; CHECK-NEXT: orr w0, w8, w9
|
||||
; CHECK-NEXT: ccmp w2, w3, #0, ne
|
||||
; CHECK-NEXT: cset w0, gt
|
||||
; CHECK-NEXT: ret
|
||||
%a = icmp eq i32 %x, %y
|
||||
%b = icmp sgt i32 %z, %w
|
||||
|
@ -46,10 +44,8 @@ define i1 @or_not(i32 %x, i32 %y, i32 %z, i32 %w) {
|
|||
; CHECK-LABEL: or_not:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: cmp w0, w1
|
||||
; CHECK-NEXT: cset w8, ne
|
||||
; CHECK-NEXT: cmp w2, w3
|
||||
; CHECK-NEXT: cset w9, gt
|
||||
; CHECK-NEXT: orr w0, w8, w9
|
||||
; CHECK-NEXT: ccmp w2, w3, #0, eq
|
||||
; CHECK-NEXT: cset w0, gt
|
||||
; CHECK-NEXT: ret
|
||||
%a = icmp eq i32 %x, %y
|
||||
%b = icmp sgt i32 %z, %w
|
||||
|
|
|
@ -4,24 +4,21 @@
|
|||
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
|
||||
; AARCH-LABEL: muloti_test:
|
||||
; AARCH: // %bb.0: // %start
|
||||
; AARCH-NEXT: umulh x8, x1, x2
|
||||
; AARCH-NEXT: mul x9, x3, x0
|
||||
; AARCH-NEXT: cmp xzr, x8
|
||||
; AARCH-NEXT: umulh x10, x3, x0
|
||||
; AARCH-NEXT: cset w8, ne
|
||||
; AARCH-NEXT: mul x8, x3, x0
|
||||
; AARCH-NEXT: umulh x9, x0, x2
|
||||
; AARCH-NEXT: madd x8, x1, x2, x8
|
||||
; AARCH-NEXT: umulh x10, x1, x2
|
||||
; AARCH-NEXT: adds x8, x9, x8
|
||||
; AARCH-NEXT: cset w9, hs
|
||||
; AARCH-NEXT: cmp x1, #0
|
||||
; AARCH-NEXT: ccmp x3, #0, #4, ne
|
||||
; AARCH-NEXT: madd x9, x1, x2, x9
|
||||
; AARCH-NEXT: cset w11, ne
|
||||
; AARCH-NEXT: cmp xzr, x10
|
||||
; AARCH-NEXT: umulh x10, x0, x2
|
||||
; AARCH-NEXT: orr w8, w11, w8
|
||||
; AARCH-NEXT: cset w11, ne
|
||||
; AARCH-NEXT: mov x1, x8
|
||||
; AARCH-NEXT: ccmp xzr, x10, #0, eq
|
||||
; AARCH-NEXT: umulh x10, x3, x0
|
||||
; AARCH-NEXT: mul x0, x0, x2
|
||||
; AARCH-NEXT: adds x1, x10, x9
|
||||
; AARCH-NEXT: orr w8, w8, w11
|
||||
; AARCH-NEXT: cset w9, hs
|
||||
; AARCH-NEXT: orr w2, w8, w9
|
||||
; AARCH-NEXT: ccmp xzr, x10, #0, eq
|
||||
; AARCH-NEXT: cset w10, ne
|
||||
; AARCH-NEXT: orr w2, w10, w9
|
||||
; AARCH-NEXT: ret
|
||||
start:
|
||||
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
|
||||
|
|
|
@ -322,48 +322,40 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
|
|||
define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) nounwind {
|
||||
; CHECK-LABEL: umulo_v2i128:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: umulh x8, x3, x6
|
||||
; CHECK-NEXT: mul x10, x7, x2
|
||||
; CHECK-NEXT: cmp xzr, x8
|
||||
; CHECK-NEXT: umulh x8, x7, x2
|
||||
; CHECK-NEXT: cset w9, ne
|
||||
; CHECK-NEXT: mul x8, x7, x2
|
||||
; CHECK-NEXT: umulh x9, x2, x6
|
||||
; CHECK-NEXT: madd x8, x3, x6, x8
|
||||
; CHECK-NEXT: umulh x10, x3, x6
|
||||
; CHECK-NEXT: adds x8, x9, x8
|
||||
; CHECK-NEXT: umulh x11, x7, x2
|
||||
; CHECK-NEXT: cset w9, hs
|
||||
; CHECK-NEXT: cmp x3, #0
|
||||
; CHECK-NEXT: ccmp x7, #0, #4, ne
|
||||
; CHECK-NEXT: umulh x11, x2, x6
|
||||
; CHECK-NEXT: madd x10, x3, x6, x10
|
||||
; CHECK-NEXT: umulh x12, x1, x4
|
||||
; CHECK-NEXT: cset w13, ne
|
||||
; CHECK-NEXT: cmp xzr, x8
|
||||
; CHECK-NEXT: cset w8, ne
|
||||
; CHECK-NEXT: umulh x13, x1, x4
|
||||
; CHECK-NEXT: ccmp xzr, x10, #0, eq
|
||||
; CHECK-NEXT: mul x10, x5, x0
|
||||
; CHECK-NEXT: madd x10, x1, x4, x10
|
||||
; CHECK-NEXT: ccmp xzr, x11, #0, eq
|
||||
; CHECK-NEXT: umulh x11, x0, x4
|
||||
; CHECK-NEXT: cset w12, ne
|
||||
; CHECK-NEXT: adds x10, x11, x10
|
||||
; CHECK-NEXT: cset w11, hs
|
||||
; CHECK-NEXT: cmp xzr, x12
|
||||
; CHECK-NEXT: cset w12, ne
|
||||
; CHECK-NEXT: cmp x1, #0
|
||||
; CHECK-NEXT: ccmp x5, #0, #4, ne
|
||||
; CHECK-NEXT: mul x15, x5, x0
|
||||
; CHECK-NEXT: umulh x14, x5, x0
|
||||
; CHECK-NEXT: orr w9, w13, w9
|
||||
; CHECK-NEXT: umulh x16, x0, x4
|
||||
; CHECK-NEXT: orr w8, w9, w8
|
||||
; CHECK-NEXT: madd x15, x1, x4, x15
|
||||
; CHECK-NEXT: cset w17, ne
|
||||
; CHECK-NEXT: cmp xzr, x14
|
||||
; CHECK-NEXT: orr w12, w17, w12
|
||||
; CHECK-NEXT: cset w14, ne
|
||||
; CHECK-NEXT: adds x15, x16, x15
|
||||
; CHECK-NEXT: orr w12, w12, w14
|
||||
; CHECK-NEXT: cset w14, hs
|
||||
; CHECK-NEXT: orr w12, w12, w14
|
||||
; CHECK-NEXT: orr w8, w8, w11
|
||||
; CHECK-NEXT: mul x11, x0, x4
|
||||
; CHECK-NEXT: ldr x9, [sp]
|
||||
; CHECK-NEXT: fmov s0, w12
|
||||
; CHECK-NEXT: stp x11, x15, [x9]
|
||||
; CHECK-NEXT: mov v0.s[1], w8
|
||||
; CHECK-NEXT: mul x8, x2, x6
|
||||
; CHECK-NEXT: orr w9, w12, w9
|
||||
; CHECK-NEXT: mul x12, x0, x4
|
||||
; CHECK-NEXT: ccmp xzr, x13, #0, eq
|
||||
; CHECK-NEXT: umulh x13, x5, x0
|
||||
; CHECK-NEXT: ccmp xzr, x13, #0, eq
|
||||
; CHECK-NEXT: cset w13, ne
|
||||
; CHECK-NEXT: orr w11, w13, w11
|
||||
; CHECK-NEXT: fmov s0, w11
|
||||
; CHECK-NEXT: ldr x11, [sp]
|
||||
; CHECK-NEXT: mov v0.s[1], w9
|
||||
; CHECK-NEXT: mul x9, x2, x6
|
||||
; CHECK-NEXT: stp x12, x10, [x11]
|
||||
; CHECK-NEXT: shl v0.2s, v0.2s, #31
|
||||
; CHECK-NEXT: stp x8, x10, [x9, #16]
|
||||
; CHECK-NEXT: stp x9, x8, [x11, #16]
|
||||
; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
|
||||
; CHECK-NEXT: ret
|
||||
%t = call {<2 x i128>, <2 x i1>} @llvm.umul.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
|
||||
|
|
Loading…
Reference in New Issue