Extend `performANDCSELCombine` to `performANDORCSELCombine`

Differential Revision: https://reviews.llvm.org/D120422
This commit is contained in:
Karl Meakin 2022-02-16 17:07:05 +00:00
parent 6467d1d275
commit 43a0016f3d
7 changed files with 273 additions and 129 deletions

View File

@ -14034,15 +14034,85 @@ static SDValue tryCombineToBSL(SDNode *N,
return SDValue();
}
// Given a tree of and/or(csel(0, 1, cc0), csel(0, 1, cc1)), we may be able to
// convert to csel(ccmp(.., cc0)), depending on cc1:
// (AND (CSET cc0 cmp0) (CSET cc1 (CMP x1 y1)))
// =>
// (CSET cc1 (CCMP x1 y1 !cc1 cc0 cmp0))
//
// (OR (CSET cc0 cmp0) (CSET cc1 (CMP x1 y1)))
// =>
// (CSET cc1 (CCMP x1 y1 cc1 !cc0 cmp0))
static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
SDValue CSel0 = N->getOperand(0);
SDValue CSel1 = N->getOperand(1);
if (CSel0.getOpcode() != AArch64ISD::CSEL ||
CSel1.getOpcode() != AArch64ISD::CSEL)
return SDValue();
if (!CSel0->hasOneUse() || !CSel1->hasOneUse())
return SDValue();
if (!isNullConstant(CSel0.getOperand(0)) ||
!isOneConstant(CSel0.getOperand(1)) ||
!isNullConstant(CSel1.getOperand(0)) ||
!isOneConstant(CSel1.getOperand(1)))
return SDValue();
SDValue Cmp0 = CSel0.getOperand(3);
SDValue Cmp1 = CSel1.getOperand(3);
AArch64CC::CondCode CC0 = (AArch64CC::CondCode)CSel0.getConstantOperandVal(2);
AArch64CC::CondCode CC1 = (AArch64CC::CondCode)CSel1.getConstantOperandVal(2);
if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
return SDValue();
if (Cmp1.getOpcode() != AArch64ISD::SUBS &&
Cmp0.getOpcode() == AArch64ISD::SUBS) {
std::swap(Cmp0, Cmp1);
std::swap(CC0, CC1);
}
if (Cmp1.getOpcode() != AArch64ISD::SUBS)
return SDValue();
SDLoc DL(N);
SDValue CCmp;
if (N->getOpcode() == ISD::AND) {
AArch64CC::CondCode InvCC0 = AArch64CC::getInvertedCondCode(CC0);
SDValue Condition = DAG.getConstant(InvCC0, DL, MVT_CC);
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CC1);
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
} else {
SDLoc DL(N);
AArch64CC::CondCode InvCC1 = AArch64CC::getInvertedCondCode(CC1);
SDValue Condition = DAG.getConstant(CC0, DL, MVT_CC);
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvCC1);
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
}
return DAG.getNode(AArch64ISD::CSEL, DL, VT, CSel0.getOperand(0),
CSel0.getOperand(1), DAG.getConstant(CC1, DL, MVT::i32),
CCmp);
}
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
// Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
if (SDValue R = performANDORCSELCombine(N, DAG))
return R;
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
// Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
if (SDValue Res = tryCombineToEXTR(N, DCI))
return Res;
@ -14171,60 +14241,13 @@ static SDValue performSVEAndCombine(SDNode *N,
return SDValue();
}
// Given a tree of and(csel(0, 1, cc0), csel(0, 1, cc1)), we may be able to
// convert to csel(ccmp(.., cc0)), depending on cc1.
static SDValue PerformANDCSELCombine(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
SDValue CSel0 = N->getOperand(0);
SDValue CSel1 = N->getOperand(1);
if (CSel0.getOpcode() != AArch64ISD::CSEL ||
CSel1.getOpcode() != AArch64ISD::CSEL)
return SDValue();
if (!CSel0->hasOneUse() || !CSel1->hasOneUse())
return SDValue();
if (!isNullConstant(CSel0.getOperand(0)) ||
!isOneConstant(CSel0.getOperand(1)) ||
!isNullConstant(CSel1.getOperand(0)) ||
!isOneConstant(CSel1.getOperand(1)))
return SDValue();
SDValue Cmp0 = CSel0.getOperand(3);
SDValue Cmp1 = CSel1.getOperand(3);
AArch64CC::CondCode CC0 = (AArch64CC::CondCode)CSel0.getConstantOperandVal(2);
AArch64CC::CondCode CC1 = (AArch64CC::CondCode)CSel1.getConstantOperandVal(2);
if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
return SDValue();
if (Cmp1.getOpcode() != AArch64ISD::SUBS &&
Cmp0.getOpcode() == AArch64ISD::SUBS) {
std::swap(Cmp0, Cmp1);
std::swap(CC0, CC1);
}
if (Cmp1.getOpcode() != AArch64ISD::SUBS)
return SDValue();
SDLoc DL(N);
AArch64CC::CondCode InvCC0 = AArch64CC::getInvertedCondCode(CC0);
SDValue Condition = DAG.getConstant(InvCC0, DL, MVT_CC);
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CC1);
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
SDValue CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
return DAG.getNode(AArch64ISD::CSEL, DL, VT, CSel0.getOperand(0),
CSel0.getOperand(1), DAG.getConstant(CC1, DL, MVT::i32),
CCmp);
}
static SDValue performANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
SDValue LHS = N->getOperand(0);
EVT VT = N->getValueType(0);
if (SDValue R = PerformANDCSELCombine(N, DAG))
if (SDValue R = performANDORCSELCombine(N, DAG))
return R;
if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))

View File

@ -754,16 +754,12 @@ define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
@g = global i32 0
; Should not use ccmp if we have to compute the or expression in an integer
; register anyway because of other users.
define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
; CHECK-LABEL: select_noccmp2:
; CHECK: ; %bb.0:
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, lt
; CHECK-NEXT: cmp x0, #13
; CHECK-NEXT: cset w9, gt
; CHECK-NEXT: orr w8, w8, w9
; CHECK-NEXT: ccmp x0, #13, #0, ge
; CHECK-NEXT: cset w8, gt
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: csel x0, xzr, x3, ne
; CHECK-NEXT: sbfx w8, w8, #0, #1
@ -799,21 +795,17 @@ define i32 @select_noccmp3(i32 %v0, i32 %v1, i32 %v2) {
; CHECK-LABEL: select_noccmp3:
; CHECK: ; %bb.0:
; CHECK-NEXT: cmp w0, #0
; CHECK-NEXT: cset w8, lt
; CHECK-NEXT: cmp w0, #13
; CHECK-NEXT: cset w9, gt
; CHECK-NEXT: ccmp w0, #13, #0, ge
; CHECK-NEXT: cset w8, gt
; CHECK-NEXT: cmp w0, #22
; CHECK-NEXT: cset w10, lt
; CHECK-NEXT: cmp w0, #44
; CHECK-NEXT: cset w11, gt
; CHECK-NEXT: mov w9, #44
; CHECK-NEXT: ccmp w0, w9, #0, ge
; CHECK-NEXT: cset w9, gt
; CHECK-NEXT: cmp w0, #99
; CHECK-NEXT: cset w12, eq
; CHECK-NEXT: cmp w0, #77
; CHECK-NEXT: cset w13, eq
; CHECK-NEXT: orr w8, w8, w9
; CHECK-NEXT: orr w9, w10, w11
; CHECK-NEXT: and w8, w8, w9
; CHECK-NEXT: orr w9, w12, w13
; CHECK-NEXT: mov w9, #77
; CHECK-NEXT: ccmp w0, w9, #4, ne
; CHECK-NEXT: cset w9, eq
; CHECK-NEXT: tst w8, w9
; CHECK-NEXT: csel w0, w1, w2, ne
; CHECK-NEXT: ret

View File

@ -257,13 +257,12 @@ define dso_local i1 @test_setcc3() {
; CHECK-NEXT: ldr q1, [x8, :lo12:rhs]
; CHECK-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill
; CHECK-NEXT: bl __eqtf2
; CHECK-NEXT: cmp w0, #0
; CHECK-NEXT: cset w19, eq
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
; CHECK-NEXT: bl __unordtf2
; CHECK-NEXT: cmp w0, #0
; CHECK-NEXT: cset w8, ne
; CHECK-NEXT: orr w0, w8, w19
; CHECK-NEXT: ccmp w19, #0, #4, eq
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #48
; CHECK-NEXT: ret

View File

@ -0,0 +1,145 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
; Ensure chains of comparisons produce chains of `ccmp`
; (x0 < x1) && (x2 > x3)
define i32 @cmp_and2(i32 %0, i32 %1, i32 %2, i32 %3) {
; CHECK-LABEL: cmp_and2:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: ccmp w2, w3, #0, lo
; CHECK-NEXT: cset w0, hi
; CHECK-NEXT: ret
%5 = icmp ult i32 %0, %1
%6 = icmp ugt i32 %2, %3
%7 = select i1 %5, i1 %6, i1 false
%8 = zext i1 %7 to i32
ret i32 %8
}
; (x0 < x1) && (x2 > x3) && (x4 != x5)
define i32 @cmp_and3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
; CHECK-LABEL: cmp_and3:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: ccmp w2, w3, #0, lo
; CHECK-NEXT: ccmp w4, w5, #4, hi
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%7 = icmp ult i32 %0, %1
%8 = icmp ugt i32 %2, %3
%9 = select i1 %7, i1 %8, i1 false
%10 = icmp ne i32 %4, %5
%11 = select i1 %9, i1 %10, i1 false
%12 = zext i1 %11 to i32
ret i32 %12
}
; (x0 < x1) && (x2 > x3) && (x4 != x5) && (x6 == x7)
define i32 @cmp_and4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
; CHECK-LABEL: cmp_and4:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp w2, w3
; CHECK-NEXT: ccmp w0, w1, #2, hi
; CHECK-NEXT: ccmp w4, w5, #4, lo
; CHECK-NEXT: ccmp w6, w7, #0, ne
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%9 = icmp ugt i32 %2, %3
%10 = icmp ult i32 %0, %1
%11 = select i1 %9, i1 %10, i1 false
%12 = icmp ne i32 %4, %5
%13 = select i1 %11, i1 %12, i1 false
%14 = icmp eq i32 %6, %7
%15 = select i1 %13, i1 %14, i1 false
%16 = zext i1 %15 to i32
ret i32 %16
}
; (x0 < x1) || (x2 > x3)
define i32 @cmp_or2(i32 %0, i32 %1, i32 %2, i32 %3) {
; CHECK-LABEL: cmp_or2:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: ccmp w2, w3, #0, hs
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%5 = icmp ult i32 %0, %1
%6 = icmp ne i32 %2, %3
%7 = select i1 %5, i1 true, i1 %6
%8 = zext i1 %7 to i32
ret i32 %8
}
; (x0 < x1) || (x2 > x3) || (x4 != x5)
define i32 @cmp_or3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
; CHECK-LABEL: cmp_or3:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: ccmp w2, w3, #2, hs
; CHECK-NEXT: ccmp w4, w5, #0, ls
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%7 = icmp ult i32 %0, %1
%8 = icmp ugt i32 %2, %3
%9 = select i1 %7, i1 true, i1 %8
%10 = icmp ne i32 %4, %5
%11 = select i1 %9, i1 true, i1 %10
%12 = zext i1 %11 to i32
ret i32 %12
}
; (x0 < x1) || (x2 > x3) || (x4 != x5) || (x6 == x7)
define i32 @cmp_or4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
; CHECK-LABEL: cmp_or4:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: ccmp w2, w3, #2, hs
; CHECK-NEXT: ccmp w4, w5, #0, ls
; CHECK-NEXT: ccmp w6, w7, #4, eq
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%9 = icmp ult i32 %0, %1
%10 = icmp ugt i32 %2, %3
%11 = select i1 %9, i1 true, i1 %10
%12 = icmp ne i32 %4, %5
%13 = select i1 %11, i1 true, i1 %12
%14 = icmp eq i32 %6, %7
%15 = select i1 %13, i1 true, i1 %14
%16 = zext i1 %15 to i32
ret i32 %16
}
; (x0 != 0) || (x1 != 0)
define i32 @true_or2(i32 %0, i32 %1) {
; CHECK-LABEL: true_or2:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%3 = icmp ne i32 %0, 0
%4 = icmp ne i32 %1, 0
%5 = select i1 %3, i1 true, i1 %4
%6 = zext i1 %5 to i32
ret i32 %6
}
; (x0 != 0) || (x1 != 0) || (x2 != 0)
define i32 @true_or3(i32 %0, i32 %1, i32 %2) {
; CHECK-LABEL: true_or3:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: orr w8, w8, w2
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%4 = icmp ne i32 %0, 0
%5 = icmp ne i32 %1, 0
%6 = select i1 %4, i1 true, i1 %5
%7 = icmp ne i32 %2, 0
%8 = select i1 %6, i1 true, i1 %7
%9 = zext i1 %8 to i32
ret i32 %9
}

View File

@ -18,10 +18,8 @@ define i1 @or(i32 %x, i32 %y, i32 %z, i32 %w) {
; CHECK-LABEL: or:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: cmp w2, w3
; CHECK-NEXT: cset w9, gt
; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: ccmp w2, w3, #0, ne
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
%a = icmp eq i32 %x, %y
%b = icmp sgt i32 %z, %w
@ -46,10 +44,8 @@ define i1 @or_not(i32 %x, i32 %y, i32 %z, i32 %w) {
; CHECK-LABEL: or_not:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: cset w8, ne
; CHECK-NEXT: cmp w2, w3
; CHECK-NEXT: cset w9, gt
; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: ccmp w2, w3, #0, eq
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
%a = icmp eq i32 %x, %y
%b = icmp sgt i32 %z, %w

View File

@ -4,24 +4,21 @@
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; AARCH-LABEL: muloti_test:
; AARCH: // %bb.0: // %start
; AARCH-NEXT: umulh x8, x1, x2
; AARCH-NEXT: mul x9, x3, x0
; AARCH-NEXT: cmp xzr, x8
; AARCH-NEXT: umulh x10, x3, x0
; AARCH-NEXT: cset w8, ne
; AARCH-NEXT: mul x8, x3, x0
; AARCH-NEXT: umulh x9, x0, x2
; AARCH-NEXT: madd x8, x1, x2, x8
; AARCH-NEXT: umulh x10, x1, x2
; AARCH-NEXT: adds x8, x9, x8
; AARCH-NEXT: cset w9, hs
; AARCH-NEXT: cmp x1, #0
; AARCH-NEXT: ccmp x3, #0, #4, ne
; AARCH-NEXT: madd x9, x1, x2, x9
; AARCH-NEXT: cset w11, ne
; AARCH-NEXT: cmp xzr, x10
; AARCH-NEXT: umulh x10, x0, x2
; AARCH-NEXT: orr w8, w11, w8
; AARCH-NEXT: cset w11, ne
; AARCH-NEXT: mov x1, x8
; AARCH-NEXT: ccmp xzr, x10, #0, eq
; AARCH-NEXT: umulh x10, x3, x0
; AARCH-NEXT: mul x0, x0, x2
; AARCH-NEXT: adds x1, x10, x9
; AARCH-NEXT: orr w8, w8, w11
; AARCH-NEXT: cset w9, hs
; AARCH-NEXT: orr w2, w8, w9
; AARCH-NEXT: ccmp xzr, x10, #0, eq
; AARCH-NEXT: cset w10, ne
; AARCH-NEXT: orr w2, w10, w9
; AARCH-NEXT: ret
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2

View File

@ -322,48 +322,40 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) nounwind {
; CHECK-LABEL: umulo_v2i128:
; CHECK: // %bb.0:
; CHECK-NEXT: umulh x8, x3, x6
; CHECK-NEXT: mul x10, x7, x2
; CHECK-NEXT: cmp xzr, x8
; CHECK-NEXT: umulh x8, x7, x2
; CHECK-NEXT: cset w9, ne
; CHECK-NEXT: mul x8, x7, x2
; CHECK-NEXT: umulh x9, x2, x6
; CHECK-NEXT: madd x8, x3, x6, x8
; CHECK-NEXT: umulh x10, x3, x6
; CHECK-NEXT: adds x8, x9, x8
; CHECK-NEXT: umulh x11, x7, x2
; CHECK-NEXT: cset w9, hs
; CHECK-NEXT: cmp x3, #0
; CHECK-NEXT: ccmp x7, #0, #4, ne
; CHECK-NEXT: umulh x11, x2, x6
; CHECK-NEXT: madd x10, x3, x6, x10
; CHECK-NEXT: umulh x12, x1, x4
; CHECK-NEXT: cset w13, ne
; CHECK-NEXT: cmp xzr, x8
; CHECK-NEXT: cset w8, ne
; CHECK-NEXT: umulh x13, x1, x4
; CHECK-NEXT: ccmp xzr, x10, #0, eq
; CHECK-NEXT: mul x10, x5, x0
; CHECK-NEXT: madd x10, x1, x4, x10
; CHECK-NEXT: ccmp xzr, x11, #0, eq
; CHECK-NEXT: umulh x11, x0, x4
; CHECK-NEXT: cset w12, ne
; CHECK-NEXT: adds x10, x11, x10
; CHECK-NEXT: cset w11, hs
; CHECK-NEXT: cmp xzr, x12
; CHECK-NEXT: cset w12, ne
; CHECK-NEXT: cmp x1, #0
; CHECK-NEXT: ccmp x5, #0, #4, ne
; CHECK-NEXT: mul x15, x5, x0
; CHECK-NEXT: umulh x14, x5, x0
; CHECK-NEXT: orr w9, w13, w9
; CHECK-NEXT: umulh x16, x0, x4
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: madd x15, x1, x4, x15
; CHECK-NEXT: cset w17, ne
; CHECK-NEXT: cmp xzr, x14
; CHECK-NEXT: orr w12, w17, w12
; CHECK-NEXT: cset w14, ne
; CHECK-NEXT: adds x15, x16, x15
; CHECK-NEXT: orr w12, w12, w14
; CHECK-NEXT: cset w14, hs
; CHECK-NEXT: orr w12, w12, w14
; CHECK-NEXT: orr w8, w8, w11
; CHECK-NEXT: mul x11, x0, x4
; CHECK-NEXT: ldr x9, [sp]
; CHECK-NEXT: fmov s0, w12
; CHECK-NEXT: stp x11, x15, [x9]
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: mul x8, x2, x6
; CHECK-NEXT: orr w9, w12, w9
; CHECK-NEXT: mul x12, x0, x4
; CHECK-NEXT: ccmp xzr, x13, #0, eq
; CHECK-NEXT: umulh x13, x5, x0
; CHECK-NEXT: ccmp xzr, x13, #0, eq
; CHECK-NEXT: cset w13, ne
; CHECK-NEXT: orr w11, w13, w11
; CHECK-NEXT: fmov s0, w11
; CHECK-NEXT: ldr x11, [sp]
; CHECK-NEXT: mov v0.s[1], w9
; CHECK-NEXT: mul x9, x2, x6
; CHECK-NEXT: stp x12, x10, [x11]
; CHECK-NEXT: shl v0.2s, v0.2s, #31
; CHECK-NEXT: stp x8, x10, [x9, #16]
; CHECK-NEXT: stp x9, x8, [x11, #16]
; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
; CHECK-NEXT: ret
%t = call {<2 x i128>, <2 x i1>} @llvm.umul.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)