[RISCV] Optimize (brcond (seteq (and X, 1 << C), 0))

If C > 10, this will require a constant to be materialized for the
And. To avoid this, we can shift X left by XLen-1-C bits to put the
tested bit in the MSB, then we can do a signed compare with 0 to
determine if the MSB is 0 or 1. Thanks to @reames for the suggestion.

I've implemented this inside of translateSetCCForBranch which is
called when setcc+brcond or setcc+select is converted to br_cc or
select_cc during lowering. It doesn't make sense to do this for
general setcc since we lack a sgez instruction.

I've tested bit 10, 11, 31, 32, 63 and a couple bits betwen 11 and 31
and between 32 and 63 for both i32 and i64 where applicable. Select
has some deficiencies where we receive (and (srl X, C), 1) instead.
This doesn't happen for br_cc due to the call to rebuildSetCC in the
generic DAGCombiner for brcond. I'll explore improving select in a
future patch.

Reviewed By: reames

Differential Revision: https://reviews.llvm.org/D130203
This commit is contained in:
Craig Topper 2022-07-20 18:11:19 -07:00
parent 7abbd6224b
commit 8983db15a3
4 changed files with 1848 additions and 472 deletions

View File

@ -1370,6 +1370,23 @@ unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context
// with 1/-1.
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
ISD::CondCode &CC, SelectionDAG &DAG) {
// If this is a single bit test that can't be handled by ANDI, shift the
// bit to be tested to the MSB and perform a signed compare with 0.
if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
isa<ConstantSDNode>(LHS.getOperand(1))) {
uint64_t Mask = LHS.getConstantOperandVal(1);
if (isPowerOf2_64(Mask) && !isInt<12>(Mask)) {
CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
unsigned ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
LHS = LHS.getOperand(0);
if (ShAmt != 0)
LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
DAG.getConstant(ShAmt, DL, LHS.getValueType()));
return;
}
}
// Convert X > -1 to X >= 0.
if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
RHS = DAG.getConstant(0, DL, RHS.getValueType());

File diff suppressed because it is too large Load Diff

View File

@ -12458,10 +12458,10 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB97_28
; RV64ZVE32F-NEXT: bnez a2, .LBB97_26
; RV64ZVE32F-NEXT: # %bb.7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB97_29
; RV64ZVE32F-NEXT: bnez a2, .LBB97_27
; RV64ZVE32F-NEXT: .LBB97_8: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB97_10
@ -12480,13 +12480,13 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB97_30
; RV64ZVE32F-NEXT: bnez a2, .LBB97_28
; RV64ZVE32F-NEXT: # %bb.11: # %else17
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: bnez a2, .LBB97_31
; RV64ZVE32F-NEXT: bnez a2, .LBB97_29
; RV64ZVE32F-NEXT: .LBB97_12: # %else20
; RV64ZVE32F-NEXT: andi a2, a1, 256
; RV64ZVE32F-NEXT: bnez a2, .LBB97_32
; RV64ZVE32F-NEXT: bnez a2, .LBB97_30
; RV64ZVE32F-NEXT: .LBB97_13: # %else23
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: beqz a2, .LBB97_15
@ -12513,37 +12513,16 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 10
; RV64ZVE32F-NEXT: .LBB97_17: # %else29
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
; RV64ZVE32F-NEXT: lui a2, 1
; RV64ZVE32F-NEXT: addiw a3, a2, -2048
; RV64ZVE32F-NEXT: and a3, a1, a3
; RV64ZVE32F-NEXT: slli a2, a1, 52
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: beqz a3, .LBB97_19
; RV64ZVE32F-NEXT: # %bb.18: # %cond.load31
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lb a3, 0(a3)
; RV64ZVE32F-NEXT: vmv.s.x v10, a3
; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 11
; RV64ZVE32F-NEXT: .LBB97_19: # %else32
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB97_21
; RV64ZVE32F-NEXT: # %bb.20: # %cond.load34
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 12
; RV64ZVE32F-NEXT: .LBB97_21: # %else35
; RV64ZVE32F-NEXT: lui a2, 2
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB97_23
; RV64ZVE32F-NEXT: # %bb.22: # %cond.load37
; RV64ZVE32F-NEXT: bltz a2, .LBB97_31
; RV64ZVE32F-NEXT: # %bb.18: # %else32
; RV64ZVE32F-NEXT: slli a2, a1, 51
; RV64ZVE32F-NEXT: bltz a2, .LBB97_32
; RV64ZVE32F-NEXT: .LBB97_19: # %else35
; RV64ZVE32F-NEXT: slli a2, a1, 50
; RV64ZVE32F-NEXT: bgez a2, .LBB97_21
; RV64ZVE32F-NEXT: .LBB97_20: # %cond.load37
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@ -12552,24 +12531,23 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 13
; RV64ZVE32F-NEXT: .LBB97_23: # %else38
; RV64ZVE32F-NEXT: .LBB97_21: # %else38
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: lui a2, 4
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: slli a2, a1, 49
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB97_25
; RV64ZVE32F-NEXT: # %bb.24: # %cond.load40
; RV64ZVE32F-NEXT: bgez a2, .LBB97_23
; RV64ZVE32F-NEXT: # %bb.22: # %cond.load40
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 14
; RV64ZVE32F-NEXT: .LBB97_25: # %else41
; RV64ZVE32F-NEXT: .LBB97_23: # %else41
; RV64ZVE32F-NEXT: lui a2, 1048568
; RV64ZVE32F-NEXT: and a1, a1, a2
; RV64ZVE32F-NEXT: beqz a1, .LBB97_27
; RV64ZVE32F-NEXT: # %bb.26: # %cond.load43
; RV64ZVE32F-NEXT: beqz a1, .LBB97_25
; RV64ZVE32F-NEXT: # %bb.24: # %cond.load43
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@ -12578,10 +12556,10 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 15
; RV64ZVE32F-NEXT: .LBB97_27: # %else44
; RV64ZVE32F-NEXT: .LBB97_25: # %else44
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB97_28: # %cond.load7
; RV64ZVE32F-NEXT: .LBB97_26: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@ -12592,7 +12570,7 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB97_8
; RV64ZVE32F-NEXT: .LBB97_29: # %cond.load10
; RV64ZVE32F-NEXT: .LBB97_27: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
@ -12604,7 +12582,7 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB97_9
; RV64ZVE32F-NEXT: j .LBB97_10
; RV64ZVE32F-NEXT: .LBB97_30: # %cond.load16
; RV64ZVE32F-NEXT: .LBB97_28: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
@ -12613,7 +12591,7 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 6
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: beqz a2, .LBB97_12
; RV64ZVE32F-NEXT: .LBB97_31: # %cond.load19
; RV64ZVE32F-NEXT: .LBB97_29: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@ -12624,7 +12602,7 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 7
; RV64ZVE32F-NEXT: andi a2, a1, 256
; RV64ZVE32F-NEXT: beqz a2, .LBB97_13
; RV64ZVE32F-NEXT: .LBB97_32: # %cond.load22
; RV64ZVE32F-NEXT: .LBB97_30: # %cond.load22
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
@ -12636,6 +12614,29 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: bnez a2, .LBB97_14
; RV64ZVE32F-NEXT: j .LBB97_15
; RV64ZVE32F-NEXT: .LBB97_31: # %cond.load31
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 11
; RV64ZVE32F-NEXT: slli a2, a1, 51
; RV64ZVE32F-NEXT: bgez a2, .LBB97_19
; RV64ZVE32F-NEXT: .LBB97_32: # %cond.load34
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 12
; RV64ZVE32F-NEXT: slli a2, a1, 50
; RV64ZVE32F-NEXT: bltz a2, .LBB97_20
; RV64ZVE32F-NEXT: j .LBB97_21
%ptrs = getelementptr inbounds i8, i8* %base, <16 x i8> %idxs
%v = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 2, <16 x i1> %m, <16 x i8> %passthru)
ret <16 x i8> %v
@ -12722,10 +12723,10 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB98_60
; RV64ZVE32F-NEXT: bnez a2, .LBB98_50
; RV64ZVE32F-NEXT: # %bb.7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB98_61
; RV64ZVE32F-NEXT: bnez a2, .LBB98_51
; RV64ZVE32F-NEXT: .LBB98_8: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB98_10
@ -12746,13 +12747,13 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB98_62
; RV64ZVE32F-NEXT: bnez a2, .LBB98_52
; RV64ZVE32F-NEXT: # %bb.11: # %else17
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: bnez a2, .LBB98_63
; RV64ZVE32F-NEXT: bnez a2, .LBB98_53
; RV64ZVE32F-NEXT: .LBB98_12: # %else20
; RV64ZVE32F-NEXT: andi a2, a1, 256
; RV64ZVE32F-NEXT: bnez a2, .LBB98_64
; RV64ZVE32F-NEXT: bnez a2, .LBB98_54
; RV64ZVE32F-NEXT: .LBB98_13: # %else23
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: beqz a2, .LBB98_15
@ -12783,27 +12784,25 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 10
; RV64ZVE32F-NEXT: .LBB98_17: # %else29
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
; RV64ZVE32F-NEXT: lui a2, 1
; RV64ZVE32F-NEXT: addiw a3, a2, -2048
; RV64ZVE32F-NEXT: and a3, a1, a3
; RV64ZVE32F-NEXT: slli a2, a1, 52
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 4
; RV64ZVE32F-NEXT: beqz a3, .LBB98_19
; RV64ZVE32F-NEXT: bgez a2, .LBB98_19
; RV64ZVE32F-NEXT: # %bb.18: # %cond.load31
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v13
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lb a3, 0(a3)
; RV64ZVE32F-NEXT: li a4, 32
; RV64ZVE32F-NEXT: vsetvli zero, a4, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v14, a3
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 11
; RV64ZVE32F-NEXT: .LBB98_19: # %else32
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, mu
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: slli a2, a1, 51
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB98_21
; RV64ZVE32F-NEXT: bgez a2, .LBB98_21
; RV64ZVE32F-NEXT: # %bb.20: # %cond.load34
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
@ -12814,9 +12813,8 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 12
; RV64ZVE32F-NEXT: .LBB98_21: # %else35
; RV64ZVE32F-NEXT: lui a2, 2
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_23
; RV64ZVE32F-NEXT: slli a2, a1, 50
; RV64ZVE32F-NEXT: bgez a2, .LBB98_23
; RV64ZVE32F-NEXT: # %bb.22: # %cond.load37
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
@ -12830,53 +12828,19 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 13
; RV64ZVE32F-NEXT: .LBB98_23: # %else38
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: lui a2, 4
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: slli a2, a1, 49
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_25
; RV64ZVE32F-NEXT: # %bb.24: # %cond.load40
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 14
; RV64ZVE32F-NEXT: .LBB98_25: # %else41
; RV64ZVE32F-NEXT: lui a2, 8
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_27
; RV64ZVE32F-NEXT: # %bb.26: # %cond.load43
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 15
; RV64ZVE32F-NEXT: .LBB98_27: # %else44
; RV64ZVE32F-NEXT: lui a2, 16
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_29
; RV64ZVE32F-NEXT: # %bb.28: # %cond.load46
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 16
; RV64ZVE32F-NEXT: .LBB98_29: # %else47
; RV64ZVE32F-NEXT: lui a2, 32
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_31
; RV64ZVE32F-NEXT: # %bb.30: # %cond.load49
; RV64ZVE32F-NEXT: bltz a2, .LBB98_55
; RV64ZVE32F-NEXT: # %bb.24: # %else41
; RV64ZVE32F-NEXT: slli a2, a1, 48
; RV64ZVE32F-NEXT: bltz a2, .LBB98_56
; RV64ZVE32F-NEXT: .LBB98_25: # %else44
; RV64ZVE32F-NEXT: slli a2, a1, 47
; RV64ZVE32F-NEXT: bltz a2, .LBB98_57
; RV64ZVE32F-NEXT: .LBB98_26: # %else47
; RV64ZVE32F-NEXT: slli a2, a1, 46
; RV64ZVE32F-NEXT: bgez a2, .LBB98_28
; RV64ZVE32F-NEXT: .LBB98_27: # %cond.load49
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
@ -12887,13 +12851,12 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 18, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 17
; RV64ZVE32F-NEXT: .LBB98_31: # %else50
; RV64ZVE32F-NEXT: .LBB98_28: # %else50
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: lui a2, 64
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: slli a2, a1, 45
; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_33
; RV64ZVE32F-NEXT: # %bb.32: # %cond.load52
; RV64ZVE32F-NEXT: bgez a2, .LBB98_30
; RV64ZVE32F-NEXT: # %bb.29: # %cond.load52
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
@ -12902,42 +12865,18 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 19, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 18
; RV64ZVE32F-NEXT: .LBB98_33: # %else53
; RV64ZVE32F-NEXT: .LBB98_30: # %else53
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
; RV64ZVE32F-NEXT: lui a2, 128
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: slli a2, a1, 44
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB98_35
; RV64ZVE32F-NEXT: # %bb.34: # %cond.load55
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 20, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 19
; RV64ZVE32F-NEXT: .LBB98_35: # %else56
; RV64ZVE32F-NEXT: lui a2, 256
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_37
; RV64ZVE32F-NEXT: # %bb.36: # %cond.load58
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 20
; RV64ZVE32F-NEXT: .LBB98_37: # %else59
; RV64ZVE32F-NEXT: lui a2, 512
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_39
; RV64ZVE32F-NEXT: # %bb.38: # %cond.load61
; RV64ZVE32F-NEXT: bltz a2, .LBB98_58
; RV64ZVE32F-NEXT: # %bb.31: # %else56
; RV64ZVE32F-NEXT: slli a2, a1, 43
; RV64ZVE32F-NEXT: bltz a2, .LBB98_59
; RV64ZVE32F-NEXT: .LBB98_32: # %else59
; RV64ZVE32F-NEXT: slli a2, a1, 42
; RV64ZVE32F-NEXT: bgez a2, .LBB98_34
; RV64ZVE32F-NEXT: .LBB98_33: # %cond.load61
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
@ -12948,57 +12887,23 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 22, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 21
; RV64ZVE32F-NEXT: .LBB98_39: # %else62
; RV64ZVE32F-NEXT: .LBB98_34: # %else62
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: lui a2, 1024
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: slli a2, a1, 41
; RV64ZVE32F-NEXT: vslidedown.vi v9, v12, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_41
; RV64ZVE32F-NEXT: # %bb.40: # %cond.load64
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 23, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 22
; RV64ZVE32F-NEXT: .LBB98_41: # %else65
; RV64ZVE32F-NEXT: lui a2, 2048
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_43
; RV64ZVE32F-NEXT: # %bb.42: # %cond.load67
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 24, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 23
; RV64ZVE32F-NEXT: .LBB98_43: # %else68
; RV64ZVE32F-NEXT: lui a2, 4096
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_45
; RV64ZVE32F-NEXT: # %bb.44: # %cond.load70
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 25, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 24
; RV64ZVE32F-NEXT: .LBB98_45: # %else71
; RV64ZVE32F-NEXT: lui a2, 8192
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_47
; RV64ZVE32F-NEXT: # %bb.46: # %cond.load73
; RV64ZVE32F-NEXT: bltz a2, .LBB98_60
; RV64ZVE32F-NEXT: # %bb.35: # %else65
; RV64ZVE32F-NEXT: slli a2, a1, 40
; RV64ZVE32F-NEXT: bltz a2, .LBB98_61
; RV64ZVE32F-NEXT: .LBB98_36: # %else68
; RV64ZVE32F-NEXT: slli a2, a1, 39
; RV64ZVE32F-NEXT: bltz a2, .LBB98_62
; RV64ZVE32F-NEXT: .LBB98_37: # %else71
; RV64ZVE32F-NEXT: slli a2, a1, 38
; RV64ZVE32F-NEXT: bgez a2, .LBB98_39
; RV64ZVE32F-NEXT: .LBB98_38: # %cond.load73
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@ -13009,13 +12914,12 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 26, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 25
; RV64ZVE32F-NEXT: .LBB98_47: # %else74
; RV64ZVE32F-NEXT: .LBB98_39: # %else74
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: lui a2, 16384
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: slli a2, a1, 37
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_49
; RV64ZVE32F-NEXT: # %bb.48: # %cond.load76
; RV64ZVE32F-NEXT: bgez a2, .LBB98_41
; RV64ZVE32F-NEXT: # %bb.40: # %cond.load76
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
@ -13024,42 +12928,18 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 27, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 26
; RV64ZVE32F-NEXT: .LBB98_49: # %else77
; RV64ZVE32F-NEXT: .LBB98_41: # %else77
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
; RV64ZVE32F-NEXT: lui a2, 32768
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: slli a2, a1, 36
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB98_51
; RV64ZVE32F-NEXT: # %bb.50: # %cond.load79
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 28, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 27
; RV64ZVE32F-NEXT: .LBB98_51: # %else80
; RV64ZVE32F-NEXT: lui a2, 65536
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_53
; RV64ZVE32F-NEXT: # %bb.52: # %cond.load82
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 29, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 28
; RV64ZVE32F-NEXT: .LBB98_53: # %else83
; RV64ZVE32F-NEXT: lui a2, 131072
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_55
; RV64ZVE32F-NEXT: # %bb.54: # %cond.load85
; RV64ZVE32F-NEXT: bltz a2, .LBB98_63
; RV64ZVE32F-NEXT: # %bb.42: # %else80
; RV64ZVE32F-NEXT: slli a2, a1, 35
; RV64ZVE32F-NEXT: bltz a2, .LBB98_64
; RV64ZVE32F-NEXT: .LBB98_43: # %else83
; RV64ZVE32F-NEXT: slli a2, a1, 34
; RV64ZVE32F-NEXT: bgez a2, .LBB98_45
; RV64ZVE32F-NEXT: .LBB98_44: # %cond.load85
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@ -13070,13 +12950,12 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 30, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 29
; RV64ZVE32F-NEXT: .LBB98_55: # %else86
; RV64ZVE32F-NEXT: .LBB98_45: # %else86
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: lui a2, 262144
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: slli a2, a1, 33
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_57
; RV64ZVE32F-NEXT: # %bb.56: # %cond.load88
; RV64ZVE32F-NEXT: bgez a2, .LBB98_47
; RV64ZVE32F-NEXT: # %bb.46: # %cond.load88
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
@ -13085,11 +12964,11 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 31, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 30
; RV64ZVE32F-NEXT: .LBB98_57: # %else89
; RV64ZVE32F-NEXT: .LBB98_47: # %else89
; RV64ZVE32F-NEXT: lui a2, 524288
; RV64ZVE32F-NEXT: and a1, a1, a2
; RV64ZVE32F-NEXT: beqz a1, .LBB98_59
; RV64ZVE32F-NEXT: # %bb.58: # %cond.load91
; RV64ZVE32F-NEXT: beqz a1, .LBB98_49
; RV64ZVE32F-NEXT: # %bb.48: # %cond.load91
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@ -13100,10 +12979,10 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 31
; RV64ZVE32F-NEXT: .LBB98_59: # %else92
; RV64ZVE32F-NEXT: .LBB98_49: # %else92
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB98_60: # %cond.load7
; RV64ZVE32F-NEXT: .LBB98_50: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
@ -13116,7 +12995,7 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB98_8
; RV64ZVE32F-NEXT: .LBB98_61: # %cond.load10
; RV64ZVE32F-NEXT: .LBB98_51: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
@ -13129,7 +13008,7 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB98_9
; RV64ZVE32F-NEXT: j .LBB98_10
; RV64ZVE32F-NEXT: .LBB98_62: # %cond.load16
; RV64ZVE32F-NEXT: .LBB98_52: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
@ -13140,7 +13019,7 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 6
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: beqz a2, .LBB98_12
; RV64ZVE32F-NEXT: .LBB98_63: # %cond.load19
; RV64ZVE32F-NEXT: .LBB98_53: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
@ -13153,7 +13032,7 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 7
; RV64ZVE32F-NEXT: andi a2, a1, 256
; RV64ZVE32F-NEXT: beqz a2, .LBB98_13
; RV64ZVE32F-NEXT: .LBB98_64: # %cond.load22
; RV64ZVE32F-NEXT: .LBB98_54: # %cond.load22
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
@ -13166,6 +13045,132 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: bnez a2, .LBB98_14
; RV64ZVE32F-NEXT: j .LBB98_15
; RV64ZVE32F-NEXT: .LBB98_55: # %cond.load40
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 14
; RV64ZVE32F-NEXT: slli a2, a1, 48
; RV64ZVE32F-NEXT: bgez a2, .LBB98_25
; RV64ZVE32F-NEXT: .LBB98_56: # %cond.load43
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 15
; RV64ZVE32F-NEXT: slli a2, a1, 47
; RV64ZVE32F-NEXT: bgez a2, .LBB98_26
; RV64ZVE32F-NEXT: .LBB98_57: # %cond.load46
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 16
; RV64ZVE32F-NEXT: slli a2, a1, 46
; RV64ZVE32F-NEXT: bltz a2, .LBB98_27
; RV64ZVE32F-NEXT: j .LBB98_28
; RV64ZVE32F-NEXT: .LBB98_58: # %cond.load55
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 20, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 19
; RV64ZVE32F-NEXT: slli a2, a1, 43
; RV64ZVE32F-NEXT: bgez a2, .LBB98_32
; RV64ZVE32F-NEXT: .LBB98_59: # %cond.load58
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 20
; RV64ZVE32F-NEXT: slli a2, a1, 42
; RV64ZVE32F-NEXT: bltz a2, .LBB98_33
; RV64ZVE32F-NEXT: j .LBB98_34
; RV64ZVE32F-NEXT: .LBB98_60: # %cond.load64
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 23, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 22
; RV64ZVE32F-NEXT: slli a2, a1, 40
; RV64ZVE32F-NEXT: bgez a2, .LBB98_36
; RV64ZVE32F-NEXT: .LBB98_61: # %cond.load67
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 24, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 23
; RV64ZVE32F-NEXT: slli a2, a1, 39
; RV64ZVE32F-NEXT: bgez a2, .LBB98_37
; RV64ZVE32F-NEXT: .LBB98_62: # %cond.load70
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 25, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 24
; RV64ZVE32F-NEXT: slli a2, a1, 38
; RV64ZVE32F-NEXT: bltz a2, .LBB98_38
; RV64ZVE32F-NEXT: j .LBB98_39
; RV64ZVE32F-NEXT: .LBB98_63: # %cond.load79
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 28, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 27
; RV64ZVE32F-NEXT: slli a2, a1, 35
; RV64ZVE32F-NEXT: bgez a2, .LBB98_43
; RV64ZVE32F-NEXT: .LBB98_64: # %cond.load82
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lb a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 29, e8, m2, tu, mu
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 28
; RV64ZVE32F-NEXT: slli a2, a1, 34
; RV64ZVE32F-NEXT: bltz a2, .LBB98_44
; RV64ZVE32F-NEXT: j .LBB98_45
%ptrs = getelementptr inbounds i8, i8* %base, <32 x i8> %idxs
%v = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> %ptrs, i32 2, <32 x i1> %m, <32 x i8> %passthru)
ret <32 x i8> %v

View File

@ -10842,10 +10842,10 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB91_28
; RV64ZVE32F-NEXT: bnez a2, .LBB91_26
; RV64ZVE32F-NEXT: # %bb.7: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB91_29
; RV64ZVE32F-NEXT: bnez a2, .LBB91_27
; RV64ZVE32F-NEXT: .LBB91_8: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB91_10
@ -10863,13 +10863,13 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB91_30
; RV64ZVE32F-NEXT: bnez a2, .LBB91_28
; RV64ZVE32F-NEXT: # %bb.11: # %else12
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: bnez a2, .LBB91_31
; RV64ZVE32F-NEXT: bnez a2, .LBB91_29
; RV64ZVE32F-NEXT: .LBB91_12: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 256
; RV64ZVE32F-NEXT: bnez a2, .LBB91_32
; RV64ZVE32F-NEXT: bnez a2, .LBB91_30
; RV64ZVE32F-NEXT: .LBB91_13: # %else16
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: beqz a2, .LBB91_15
@ -10894,34 +10894,16 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
; RV64ZVE32F-NEXT: .LBB91_17: # %else20
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
; RV64ZVE32F-NEXT: lui a2, 1
; RV64ZVE32F-NEXT: addiw a3, a2, -2048
; RV64ZVE32F-NEXT: and a3, a1, a3
; RV64ZVE32F-NEXT: slli a2, a1, 52
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4
; RV64ZVE32F-NEXT: beqz a3, .LBB91_19
; RV64ZVE32F-NEXT: # %bb.18: # %cond.store21
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 11
; RV64ZVE32F-NEXT: vse8.v v10, (a3)
; RV64ZVE32F-NEXT: .LBB91_19: # %else22
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB91_21
; RV64ZVE32F-NEXT: # %bb.20: # %cond.store23
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 12
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
; RV64ZVE32F-NEXT: .LBB91_21: # %else24
; RV64ZVE32F-NEXT: lui a2, 2
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB91_23
; RV64ZVE32F-NEXT: # %bb.22: # %cond.store25
; RV64ZVE32F-NEXT: bltz a2, .LBB91_31
; RV64ZVE32F-NEXT: # %bb.18: # %else22
; RV64ZVE32F-NEXT: slli a2, a1, 51
; RV64ZVE32F-NEXT: bltz a2, .LBB91_32
; RV64ZVE32F-NEXT: .LBB91_19: # %else24
; RV64ZVE32F-NEXT: slli a2, a1, 50
; RV64ZVE32F-NEXT: bgez a2, .LBB91_21
; RV64ZVE32F-NEXT: .LBB91_20: # %cond.store25
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@ -10929,23 +10911,22 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 13
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
; RV64ZVE32F-NEXT: .LBB91_23: # %else26
; RV64ZVE32F-NEXT: .LBB91_21: # %else26
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: lui a2, 4
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: slli a2, a1, 49
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB91_25
; RV64ZVE32F-NEXT: # %bb.24: # %cond.store27
; RV64ZVE32F-NEXT: bgez a2, .LBB91_23
; RV64ZVE32F-NEXT: # %bb.22: # %cond.store27
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 14
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
; RV64ZVE32F-NEXT: .LBB91_25: # %else28
; RV64ZVE32F-NEXT: .LBB91_23: # %else28
; RV64ZVE32F-NEXT: lui a2, 1048568
; RV64ZVE32F-NEXT: and a1, a1, a2
; RV64ZVE32F-NEXT: beqz a1, .LBB91_27
; RV64ZVE32F-NEXT: # %bb.26: # %cond.store29
; RV64ZVE32F-NEXT: beqz a1, .LBB91_25
; RV64ZVE32F-NEXT: # %bb.24: # %cond.store29
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v9
@ -10953,9 +10934,9 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 15
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB91_27: # %else30
; RV64ZVE32F-NEXT: .LBB91_25: # %else30
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB91_28: # %cond.store5
; RV64ZVE32F-NEXT: .LBB91_26: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@ -10965,7 +10946,7 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB91_8
; RV64ZVE32F-NEXT: .LBB91_29: # %cond.store7
; RV64ZVE32F-NEXT: .LBB91_27: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
@ -10975,7 +10956,7 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB91_9
; RV64ZVE32F-NEXT: j .LBB91_10
; RV64ZVE32F-NEXT: .LBB91_30: # %cond.store11
; RV64ZVE32F-NEXT: .LBB91_28: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu
@ -10983,7 +10964,7 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: beqz a2, .LBB91_12
; RV64ZVE32F-NEXT: .LBB91_31: # %cond.store13
; RV64ZVE32F-NEXT: .LBB91_29: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@ -10993,7 +10974,7 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 256
; RV64ZVE32F-NEXT: beqz a2, .LBB91_13
; RV64ZVE32F-NEXT: .LBB91_32: # %cond.store15
; RV64ZVE32F-NEXT: .LBB91_30: # %cond.store15
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
@ -11003,6 +10984,26 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: bnez a2, .LBB91_14
; RV64ZVE32F-NEXT: j .LBB91_15
; RV64ZVE32F-NEXT: .LBB91_31: # %cond.store21
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 11
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 51
; RV64ZVE32F-NEXT: bgez a2, .LBB91_19
; RV64ZVE32F-NEXT: .LBB91_32: # %cond.store23
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 12
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 50
; RV64ZVE32F-NEXT: bltz a2, .LBB91_20
; RV64ZVE32F-NEXT: j .LBB91_21
%ptrs = getelementptr inbounds i8, i8* %base, <16 x i8> %idxs
call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %val, <16 x i8*> %ptrs, i32 1, <16 x i1> %m)
ret void
@ -11075,10 +11076,10 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: vslidedown.vi v13, v10, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB92_60
; RV64ZVE32F-NEXT: bnez a2, .LBB92_50
; RV64ZVE32F-NEXT: # %bb.7: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB92_61
; RV64ZVE32F-NEXT: bnez a2, .LBB92_51
; RV64ZVE32F-NEXT: .LBB92_8: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB92_10
@ -11096,13 +11097,13 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB92_62
; RV64ZVE32F-NEXT: bnez a2, .LBB92_52
; RV64ZVE32F-NEXT: # %bb.11: # %else12
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: bnez a2, .LBB92_63
; RV64ZVE32F-NEXT: bnez a2, .LBB92_53
; RV64ZVE32F-NEXT: .LBB92_12: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 256
; RV64ZVE32F-NEXT: bnez a2, .LBB92_64
; RV64ZVE32F-NEXT: bnez a2, .LBB92_54
; RV64ZVE32F-NEXT: .LBB92_13: # %else16
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: beqz a2, .LBB92_15
@ -11127,24 +11128,22 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: .LBB92_17: # %else20
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
; RV64ZVE32F-NEXT: lui a2, 1
; RV64ZVE32F-NEXT: addiw a3, a2, -2048
; RV64ZVE32F-NEXT: and a3, a1, a3
; RV64ZVE32F-NEXT: slli a2, a1, 52
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 4
; RV64ZVE32F-NEXT: beqz a3, .LBB92_19
; RV64ZVE32F-NEXT: bgez a2, .LBB92_19
; RV64ZVE32F-NEXT: # %bb.18: # %cond.store21
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v13
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 11
; RV64ZVE32F-NEXT: vse8.v v14, (a3)
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: .LBB92_19: # %else22
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, mu
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: slli a2, a1, 51
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB92_21
; RV64ZVE32F-NEXT: bgez a2, .LBB92_21
; RV64ZVE32F-NEXT: # %bb.20: # %cond.store23
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
@ -11152,9 +11151,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 12
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: .LBB92_21: # %else24
; RV64ZVE32F-NEXT: lui a2, 2
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_23
; RV64ZVE32F-NEXT: slli a2, a1, 50
; RV64ZVE32F-NEXT: bgez a2, .LBB92_23
; RV64ZVE32F-NEXT: # %bb.22: # %cond.store25
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
@ -11165,44 +11163,19 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: .LBB92_23: # %else26
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: lui a2, 4
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: slli a2, a1, 49
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_25
; RV64ZVE32F-NEXT: # %bb.24: # %cond.store27
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 14
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: .LBB92_25: # %else28
; RV64ZVE32F-NEXT: lui a2, 8
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_27
; RV64ZVE32F-NEXT: # %bb.26: # %cond.store29
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 15
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_27: # %else30
; RV64ZVE32F-NEXT: lui a2, 16
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_29
; RV64ZVE32F-NEXT: # %bb.28: # %cond.store31
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 16
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_29: # %else32
; RV64ZVE32F-NEXT: lui a2, 32
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_31
; RV64ZVE32F-NEXT: # %bb.30: # %cond.store33
; RV64ZVE32F-NEXT: bltz a2, .LBB92_55
; RV64ZVE32F-NEXT: # %bb.24: # %else28
; RV64ZVE32F-NEXT: slli a2, a1, 48
; RV64ZVE32F-NEXT: bltz a2, .LBB92_56
; RV64ZVE32F-NEXT: .LBB92_25: # %else30
; RV64ZVE32F-NEXT: slli a2, a1, 47
; RV64ZVE32F-NEXT: bltz a2, .LBB92_57
; RV64ZVE32F-NEXT: .LBB92_26: # %else32
; RV64ZVE32F-NEXT: slli a2, a1, 46
; RV64ZVE32F-NEXT: bgez a2, .LBB92_28
; RV64ZVE32F-NEXT: .LBB92_27: # %cond.store33
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
@ -11210,48 +11183,29 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 17
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_31: # %else34
; RV64ZVE32F-NEXT: .LBB92_28: # %else34
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: lui a2, 64
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: slli a2, a1, 45
; RV64ZVE32F-NEXT: vslidedown.vi v13, v10, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_33
; RV64ZVE32F-NEXT: # %bb.32: # %cond.store35
; RV64ZVE32F-NEXT: bgez a2, .LBB92_30
; RV64ZVE32F-NEXT: # %bb.29: # %cond.store35
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 18
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: .LBB92_33: # %else36
; RV64ZVE32F-NEXT: .LBB92_30: # %else36
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
; RV64ZVE32F-NEXT: lui a2, 128
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: slli a2, a1, 44
; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB92_35
; RV64ZVE32F-NEXT: # %bb.34: # %cond.store37
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 19
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: .LBB92_35: # %else38
; RV64ZVE32F-NEXT: lui a2, 256
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_37
; RV64ZVE32F-NEXT: # %bb.36: # %cond.store39
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 20
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: .LBB92_37: # %else40
; RV64ZVE32F-NEXT: lui a2, 512
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_39
; RV64ZVE32F-NEXT: # %bb.38: # %cond.store41
; RV64ZVE32F-NEXT: bltz a2, .LBB92_58
; RV64ZVE32F-NEXT: # %bb.31: # %else38
; RV64ZVE32F-NEXT: slli a2, a1, 43
; RV64ZVE32F-NEXT: bltz a2, .LBB92_59
; RV64ZVE32F-NEXT: .LBB92_32: # %else40
; RV64ZVE32F-NEXT: slli a2, a1, 42
; RV64ZVE32F-NEXT: bgez a2, .LBB92_34
; RV64ZVE32F-NEXT: .LBB92_33: # %cond.store41
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
@ -11259,48 +11213,23 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 21
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: .LBB92_39: # %else42
; RV64ZVE32F-NEXT: .LBB92_34: # %else42
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 8
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: lui a2, 1024
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: slli a2, a1, 41
; RV64ZVE32F-NEXT: vslidedown.vi v11, v12, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_41
; RV64ZVE32F-NEXT: # %bb.40: # %cond.store43
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 22
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_41: # %else44
; RV64ZVE32F-NEXT: lui a2, 2048
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_43
; RV64ZVE32F-NEXT: # %bb.42: # %cond.store45
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 23
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_43: # %else46
; RV64ZVE32F-NEXT: lui a2, 4096
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_45
; RV64ZVE32F-NEXT: # %bb.44: # %cond.store47
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 24
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_45: # %else48
; RV64ZVE32F-NEXT: lui a2, 8192
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_47
; RV64ZVE32F-NEXT: # %bb.46: # %cond.store49
; RV64ZVE32F-NEXT: bltz a2, .LBB92_60
; RV64ZVE32F-NEXT: # %bb.35: # %else44
; RV64ZVE32F-NEXT: slli a2, a1, 40
; RV64ZVE32F-NEXT: bltz a2, .LBB92_61
; RV64ZVE32F-NEXT: .LBB92_36: # %else46
; RV64ZVE32F-NEXT: slli a2, a1, 39
; RV64ZVE32F-NEXT: bltz a2, .LBB92_62
; RV64ZVE32F-NEXT: .LBB92_37: # %else48
; RV64ZVE32F-NEXT: slli a2, a1, 38
; RV64ZVE32F-NEXT: bgez a2, .LBB92_39
; RV64ZVE32F-NEXT: .LBB92_38: # %cond.store49
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@ -11308,48 +11237,29 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 25
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_47: # %else50
; RV64ZVE32F-NEXT: .LBB92_39: # %else50
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: lui a2, 16384
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: slli a2, a1, 37
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_49
; RV64ZVE32F-NEXT: # %bb.48: # %cond.store51
; RV64ZVE32F-NEXT: bgez a2, .LBB92_41
; RV64ZVE32F-NEXT: # %bb.40: # %cond.store51
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 26
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_49: # %else52
; RV64ZVE32F-NEXT: .LBB92_41: # %else52
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
; RV64ZVE32F-NEXT: lui a2, 32768
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: slli a2, a1, 36
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB92_51
; RV64ZVE32F-NEXT: # %bb.50: # %cond.store53
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 27
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_51: # %else54
; RV64ZVE32F-NEXT: lui a2, 65536
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_53
; RV64ZVE32F-NEXT: # %bb.52: # %cond.store55
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 28
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_53: # %else56
; RV64ZVE32F-NEXT: lui a2, 131072
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_55
; RV64ZVE32F-NEXT: # %bb.54: # %cond.store57
; RV64ZVE32F-NEXT: bltz a2, .LBB92_63
; RV64ZVE32F-NEXT: # %bb.42: # %else54
; RV64ZVE32F-NEXT: slli a2, a1, 35
; RV64ZVE32F-NEXT: bltz a2, .LBB92_64
; RV64ZVE32F-NEXT: .LBB92_43: # %else56
; RV64ZVE32F-NEXT: slli a2, a1, 34
; RV64ZVE32F-NEXT: bgez a2, .LBB92_45
; RV64ZVE32F-NEXT: .LBB92_44: # %cond.store57
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@ -11357,23 +11267,22 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 29
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_55: # %else58
; RV64ZVE32F-NEXT: .LBB92_45: # %else58
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: lui a2, 262144
; RV64ZVE32F-NEXT: and a2, a1, a2
; RV64ZVE32F-NEXT: slli a2, a1, 33
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB92_57
; RV64ZVE32F-NEXT: # %bb.56: # %cond.store59
; RV64ZVE32F-NEXT: bgez a2, .LBB92_47
; RV64ZVE32F-NEXT: # %bb.46: # %cond.store59
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 30
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_57: # %else60
; RV64ZVE32F-NEXT: .LBB92_47: # %else60
; RV64ZVE32F-NEXT: lui a2, 524288
; RV64ZVE32F-NEXT: and a1, a1, a2
; RV64ZVE32F-NEXT: beqz a1, .LBB92_59
; RV64ZVE32F-NEXT: # %bb.58: # %cond.store61
; RV64ZVE32F-NEXT: beqz a1, .LBB92_49
; RV64ZVE32F-NEXT: # %bb.48: # %cond.store61
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v10
@ -11381,9 +11290,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 31
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB92_59: # %else62
; RV64ZVE32F-NEXT: .LBB92_49: # %else62
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB92_60: # %cond.store5
; RV64ZVE32F-NEXT: .LBB92_50: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
@ -11393,7 +11302,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB92_8
; RV64ZVE32F-NEXT: .LBB92_61: # %cond.store7
; RV64ZVE32F-NEXT: .LBB92_51: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
@ -11403,7 +11312,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB92_9
; RV64ZVE32F-NEXT: j .LBB92_10
; RV64ZVE32F-NEXT: .LBB92_62: # %cond.store11
; RV64ZVE32F-NEXT: .LBB92_52: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
@ -11411,7 +11320,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: beqz a2, .LBB92_12
; RV64ZVE32F-NEXT: .LBB92_63: # %cond.store13
; RV64ZVE32F-NEXT: .LBB92_53: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
@ -11421,7 +11330,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 256
; RV64ZVE32F-NEXT: beqz a2, .LBB92_13
; RV64ZVE32F-NEXT: .LBB92_64: # %cond.store15
; RV64ZVE32F-NEXT: .LBB92_54: # %cond.store15
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
@ -11431,6 +11340,102 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: bnez a2, .LBB92_14
; RV64ZVE32F-NEXT: j .LBB92_15
; RV64ZVE32F-NEXT: .LBB92_55: # %cond.store27
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 14
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 48
; RV64ZVE32F-NEXT: bgez a2, .LBB92_25
; RV64ZVE32F-NEXT: .LBB92_56: # %cond.store29
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 15
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 47
; RV64ZVE32F-NEXT: bgez a2, .LBB92_26
; RV64ZVE32F-NEXT: .LBB92_57: # %cond.store31
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 16
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 46
; RV64ZVE32F-NEXT: bltz a2, .LBB92_27
; RV64ZVE32F-NEXT: j .LBB92_28
; RV64ZVE32F-NEXT: .LBB92_58: # %cond.store37
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 19
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 43
; RV64ZVE32F-NEXT: bgez a2, .LBB92_32
; RV64ZVE32F-NEXT: .LBB92_59: # %cond.store39
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 20
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 42
; RV64ZVE32F-NEXT: bltz a2, .LBB92_33
; RV64ZVE32F-NEXT: j .LBB92_34
; RV64ZVE32F-NEXT: .LBB92_60: # %cond.store43
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 22
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 40
; RV64ZVE32F-NEXT: bgez a2, .LBB92_36
; RV64ZVE32F-NEXT: .LBB92_61: # %cond.store45
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 23
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 39
; RV64ZVE32F-NEXT: bgez a2, .LBB92_37
; RV64ZVE32F-NEXT: .LBB92_62: # %cond.store47
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 24
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 38
; RV64ZVE32F-NEXT: bltz a2, .LBB92_38
; RV64ZVE32F-NEXT: j .LBB92_39
; RV64ZVE32F-NEXT: .LBB92_63: # %cond.store53
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 27
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 35
; RV64ZVE32F-NEXT: bgez a2, .LBB92_43
; RV64ZVE32F-NEXT: .LBB92_64: # %cond.store55
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 28
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 34
; RV64ZVE32F-NEXT: bltz a2, .LBB92_44
; RV64ZVE32F-NEXT: j .LBB92_45
%ptrs = getelementptr inbounds i8, i8* %base, <32 x i8> %idxs
call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> %val, <32 x i8*> %ptrs, i32 1, <32 x i1> %m)
ret void