[RISCV] Optimize (brcond (seteq (and X, 1 << C), 0))
If C > 10, this will require a constant to be materialized for the And. To avoid this, we can shift X left by XLen-1-C bits to put the tested bit in the MSB, then we can do a signed compare with 0 to determine if the MSB is 0 or 1. Thanks to @reames for the suggestion. I've implemented this inside of translateSetCCForBranch which is called when setcc+brcond or setcc+select is converted to br_cc or select_cc during lowering. It doesn't make sense to do this for general setcc since we lack a sgez instruction. I've tested bit 10, 11, 31, 32, 63 and a couple bits betwen 11 and 31 and between 32 and 63 for both i32 and i64 where applicable. Select has some deficiencies where we receive (and (srl X, C), 1) instead. This doesn't happen for br_cc due to the call to rebuildSetCC in the generic DAGCombiner for brcond. I'll explore improving select in a future patch. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D130203
This commit is contained in:
parent
7abbd6224b
commit
8983db15a3
|
@ -1370,6 +1370,23 @@ unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context
|
|||
// with 1/-1.
|
||||
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
|
||||
ISD::CondCode &CC, SelectionDAG &DAG) {
|
||||
// If this is a single bit test that can't be handled by ANDI, shift the
|
||||
// bit to be tested to the MSB and perform a signed compare with 0.
|
||||
if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
|
||||
LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
|
||||
isa<ConstantSDNode>(LHS.getOperand(1))) {
|
||||
uint64_t Mask = LHS.getConstantOperandVal(1);
|
||||
if (isPowerOf2_64(Mask) && !isInt<12>(Mask)) {
|
||||
CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
|
||||
unsigned ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
|
||||
LHS = LHS.getOperand(0);
|
||||
if (ShAmt != 0)
|
||||
LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
|
||||
DAG.getConstant(ShAmt, DL, LHS.getValueType()));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert X > -1 to X >= 0.
|
||||
if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
|
||||
RHS = DAG.getConstant(0, DL, RHS.getValueType());
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -12458,10 +12458,10 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 8
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB97_28
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB97_26
|
||||
; RV64ZVE32F-NEXT: # %bb.7: # %else8
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 16
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB97_29
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB97_27
|
||||
; RV64ZVE32F-NEXT: .LBB97_8: # %else11
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 32
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB97_10
|
||||
|
@ -12480,13 +12480,13 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 64
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB97_30
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB97_28
|
||||
; RV64ZVE32F-NEXT: # %bb.11: # %else17
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 128
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB97_31
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB97_29
|
||||
; RV64ZVE32F-NEXT: .LBB97_12: # %else20
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 256
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB97_32
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB97_30
|
||||
; RV64ZVE32F-NEXT: .LBB97_13: # %else23
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 512
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB97_15
|
||||
|
@ -12513,37 +12513,16 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 10
|
||||
; RV64ZVE32F-NEXT: .LBB97_17: # %else29
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 1
|
||||
; RV64ZVE32F-NEXT: addiw a3, a2, -2048
|
||||
; RV64ZVE32F-NEXT: and a3, a1, a3
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 52
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
|
||||
; RV64ZVE32F-NEXT: beqz a3, .LBB97_19
|
||||
; RV64ZVE32F-NEXT: # %bb.18: # %cond.load31
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
|
||||
; RV64ZVE32F-NEXT: add a3, a0, a3
|
||||
; RV64ZVE32F-NEXT: lb a3, 0(a3)
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v10, a3
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 11
|
||||
; RV64ZVE32F-NEXT: .LBB97_19: # %else32
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB97_21
|
||||
; RV64ZVE32F-NEXT: # %bb.20: # %cond.load34
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 12
|
||||
; RV64ZVE32F-NEXT: .LBB97_21: # %else35
|
||||
; RV64ZVE32F-NEXT: lui a2, 2
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB97_23
|
||||
; RV64ZVE32F-NEXT: # %bb.22: # %cond.load37
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB97_31
|
||||
; RV64ZVE32F-NEXT: # %bb.18: # %else32
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 51
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB97_32
|
||||
; RV64ZVE32F-NEXT: .LBB97_19: # %else35
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 50
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB97_21
|
||||
; RV64ZVE32F-NEXT: .LBB97_20: # %cond.load37
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
|
||||
|
@ -12552,24 +12531,23 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 13
|
||||
; RV64ZVE32F-NEXT: .LBB97_23: # %else38
|
||||
; RV64ZVE32F-NEXT: .LBB97_21: # %else38
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 4
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 49
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB97_25
|
||||
; RV64ZVE32F-NEXT: # %bb.24: # %cond.load40
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB97_23
|
||||
; RV64ZVE32F-NEXT: # %bb.22: # %cond.load40
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 14
|
||||
; RV64ZVE32F-NEXT: .LBB97_25: # %else41
|
||||
; RV64ZVE32F-NEXT: .LBB97_23: # %else41
|
||||
; RV64ZVE32F-NEXT: lui a2, 1048568
|
||||
; RV64ZVE32F-NEXT: and a1, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a1, .LBB97_27
|
||||
; RV64ZVE32F-NEXT: # %bb.26: # %cond.load43
|
||||
; RV64ZVE32F-NEXT: beqz a1, .LBB97_25
|
||||
; RV64ZVE32F-NEXT: # %bb.24: # %cond.load43
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
|
||||
|
@ -12578,10 +12556,10 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 15
|
||||
; RV64ZVE32F-NEXT: .LBB97_27: # %else44
|
||||
; RV64ZVE32F-NEXT: .LBB97_25: # %else44
|
||||
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
|
||||
; RV64ZVE32F-NEXT: ret
|
||||
; RV64ZVE32F-NEXT: .LBB97_28: # %cond.load7
|
||||
; RV64ZVE32F-NEXT: .LBB97_26: # %cond.load7
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
|
||||
|
@ -12592,7 +12570,7 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 3
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 16
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB97_8
|
||||
; RV64ZVE32F-NEXT: .LBB97_29: # %cond.load10
|
||||
; RV64ZVE32F-NEXT: .LBB97_27: # %cond.load10
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
|
@ -12604,7 +12582,7 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
|
|||
; RV64ZVE32F-NEXT: andi a2, a1, 32
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB97_9
|
||||
; RV64ZVE32F-NEXT: j .LBB97_10
|
||||
; RV64ZVE32F-NEXT: .LBB97_30: # %cond.load16
|
||||
; RV64ZVE32F-NEXT: .LBB97_28: # %cond.load16
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
|
@ -12613,7 +12591,7 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 6
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 128
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB97_12
|
||||
; RV64ZVE32F-NEXT: .LBB97_31: # %cond.load19
|
||||
; RV64ZVE32F-NEXT: .LBB97_29: # %cond.load19
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
|
||||
|
@ -12624,7 +12602,7 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 7
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 256
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB97_13
|
||||
; RV64ZVE32F-NEXT: .LBB97_32: # %cond.load22
|
||||
; RV64ZVE32F-NEXT: .LBB97_30: # %cond.load22
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
|
@ -12636,6 +12614,29 @@ define <16 x i8> @mgather_baseidx_v16i8(i8* %base, <16 x i8> %idxs, <16 x i1> %m
|
|||
; RV64ZVE32F-NEXT: andi a2, a1, 512
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB97_14
|
||||
; RV64ZVE32F-NEXT: j .LBB97_15
|
||||
; RV64ZVE32F-NEXT: .LBB97_31: # %cond.load31
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 11
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 51
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB97_19
|
||||
; RV64ZVE32F-NEXT: .LBB97_32: # %cond.load34
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 12
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 50
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB97_20
|
||||
; RV64ZVE32F-NEXT: j .LBB97_21
|
||||
%ptrs = getelementptr inbounds i8, i8* %base, <16 x i8> %idxs
|
||||
%v = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 2, <16 x i1> %m, <16 x i8> %passthru)
|
||||
ret <16 x i8> %v
|
||||
|
@ -12722,10 +12723,10 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 8
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 4
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB98_60
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB98_50
|
||||
; RV64ZVE32F-NEXT: # %bb.7: # %else8
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 16
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB98_61
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB98_51
|
||||
; RV64ZVE32F-NEXT: .LBB98_8: # %else11
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 32
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_10
|
||||
|
@ -12746,13 +12747,13 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 64
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB98_62
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB98_52
|
||||
; RV64ZVE32F-NEXT: # %bb.11: # %else17
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 128
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB98_63
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB98_53
|
||||
; RV64ZVE32F-NEXT: .LBB98_12: # %else20
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 256
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB98_64
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB98_54
|
||||
; RV64ZVE32F-NEXT: .LBB98_13: # %else23
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 512
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_15
|
||||
|
@ -12783,27 +12784,25 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 10
|
||||
; RV64ZVE32F-NEXT: .LBB98_17: # %else29
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 1
|
||||
; RV64ZVE32F-NEXT: addiw a3, a2, -2048
|
||||
; RV64ZVE32F-NEXT: and a3, a1, a3
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 52
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 4
|
||||
; RV64ZVE32F-NEXT: beqz a3, .LBB98_19
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB98_19
|
||||
; RV64ZVE32F-NEXT: # %bb.18: # %cond.load31
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a3, v13
|
||||
; RV64ZVE32F-NEXT: add a3, a0, a3
|
||||
; RV64ZVE32F-NEXT: lb a3, 0(a3)
|
||||
; RV64ZVE32F-NEXT: li a4, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a4, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v14, a3
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 11
|
||||
; RV64ZVE32F-NEXT: .LBB98_19: # %else32
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 51
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 16
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_21
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB98_21
|
||||
; RV64ZVE32F-NEXT: # %bb.20: # %cond.load34
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
|
@ -12814,9 +12813,8 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 12
|
||||
; RV64ZVE32F-NEXT: .LBB98_21: # %else35
|
||||
; RV64ZVE32F-NEXT: lui a2, 2
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_23
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 50
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB98_23
|
||||
; RV64ZVE32F-NEXT: # %bb.22: # %cond.load37
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
|
||||
|
@ -12830,53 +12828,19 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 13
|
||||
; RV64ZVE32F-NEXT: .LBB98_23: # %else38
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 4
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 49
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_25
|
||||
; RV64ZVE32F-NEXT: # %bb.24: # %cond.load40
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 14
|
||||
; RV64ZVE32F-NEXT: .LBB98_25: # %else41
|
||||
; RV64ZVE32F-NEXT: lui a2, 8
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_27
|
||||
; RV64ZVE32F-NEXT: # %bb.26: # %cond.load43
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 15
|
||||
; RV64ZVE32F-NEXT: .LBB98_27: # %else44
|
||||
; RV64ZVE32F-NEXT: lui a2, 16
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_29
|
||||
; RV64ZVE32F-NEXT: # %bb.28: # %cond.load46
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 16
|
||||
; RV64ZVE32F-NEXT: .LBB98_29: # %else47
|
||||
; RV64ZVE32F-NEXT: lui a2, 32
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_31
|
||||
; RV64ZVE32F-NEXT: # %bb.30: # %cond.load49
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB98_55
|
||||
; RV64ZVE32F-NEXT: # %bb.24: # %else41
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 48
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB98_56
|
||||
; RV64ZVE32F-NEXT: .LBB98_25: # %else44
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 47
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB98_57
|
||||
; RV64ZVE32F-NEXT: .LBB98_26: # %else47
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 46
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB98_28
|
||||
; RV64ZVE32F-NEXT: .LBB98_27: # %cond.load49
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
|
@ -12887,13 +12851,12 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 18, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 17
|
||||
; RV64ZVE32F-NEXT: .LBB98_31: # %else50
|
||||
; RV64ZVE32F-NEXT: .LBB98_28: # %else50
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 64
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 45
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_33
|
||||
; RV64ZVE32F-NEXT: # %bb.32: # %cond.load52
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB98_30
|
||||
; RV64ZVE32F-NEXT: # %bb.29: # %cond.load52
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
|
@ -12902,42 +12865,18 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 19, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 18
|
||||
; RV64ZVE32F-NEXT: .LBB98_33: # %else53
|
||||
; RV64ZVE32F-NEXT: .LBB98_30: # %else53
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 128
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 44
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_35
|
||||
; RV64ZVE32F-NEXT: # %bb.34: # %cond.load55
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 20, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 19
|
||||
; RV64ZVE32F-NEXT: .LBB98_35: # %else56
|
||||
; RV64ZVE32F-NEXT: lui a2, 256
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_37
|
||||
; RV64ZVE32F-NEXT: # %bb.36: # %cond.load58
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 20
|
||||
; RV64ZVE32F-NEXT: .LBB98_37: # %else59
|
||||
; RV64ZVE32F-NEXT: lui a2, 512
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_39
|
||||
; RV64ZVE32F-NEXT: # %bb.38: # %cond.load61
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB98_58
|
||||
; RV64ZVE32F-NEXT: # %bb.31: # %else56
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 43
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB98_59
|
||||
; RV64ZVE32F-NEXT: .LBB98_32: # %else59
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 42
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB98_34
|
||||
; RV64ZVE32F-NEXT: .LBB98_33: # %cond.load61
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
|
||||
|
@ -12948,57 +12887,23 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 22, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 21
|
||||
; RV64ZVE32F-NEXT: .LBB98_39: # %else62
|
||||
; RV64ZVE32F-NEXT: .LBB98_34: # %else62
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 1024
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 41
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v9, v12, 2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_41
|
||||
; RV64ZVE32F-NEXT: # %bb.40: # %cond.load64
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 23, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 22
|
||||
; RV64ZVE32F-NEXT: .LBB98_41: # %else65
|
||||
; RV64ZVE32F-NEXT: lui a2, 2048
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_43
|
||||
; RV64ZVE32F-NEXT: # %bb.42: # %cond.load67
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 24, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 23
|
||||
; RV64ZVE32F-NEXT: .LBB98_43: # %else68
|
||||
; RV64ZVE32F-NEXT: lui a2, 4096
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_45
|
||||
; RV64ZVE32F-NEXT: # %bb.44: # %cond.load70
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 25, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 24
|
||||
; RV64ZVE32F-NEXT: .LBB98_45: # %else71
|
||||
; RV64ZVE32F-NEXT: lui a2, 8192
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_47
|
||||
; RV64ZVE32F-NEXT: # %bb.46: # %cond.load73
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB98_60
|
||||
; RV64ZVE32F-NEXT: # %bb.35: # %else65
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 40
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB98_61
|
||||
; RV64ZVE32F-NEXT: .LBB98_36: # %else68
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 39
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB98_62
|
||||
; RV64ZVE32F-NEXT: .LBB98_37: # %else71
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 38
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB98_39
|
||||
; RV64ZVE32F-NEXT: .LBB98_38: # %cond.load73
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
|
||||
|
@ -13009,13 +12914,12 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 26, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 25
|
||||
; RV64ZVE32F-NEXT: .LBB98_47: # %else74
|
||||
; RV64ZVE32F-NEXT: .LBB98_39: # %else74
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 16384
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 37
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_49
|
||||
; RV64ZVE32F-NEXT: # %bb.48: # %cond.load76
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB98_41
|
||||
; RV64ZVE32F-NEXT: # %bb.40: # %cond.load76
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
|
@ -13024,42 +12928,18 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 27, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 26
|
||||
; RV64ZVE32F-NEXT: .LBB98_49: # %else77
|
||||
; RV64ZVE32F-NEXT: .LBB98_41: # %else77
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 32768
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 36
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_51
|
||||
; RV64ZVE32F-NEXT: # %bb.50: # %cond.load79
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 28, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 27
|
||||
; RV64ZVE32F-NEXT: .LBB98_51: # %else80
|
||||
; RV64ZVE32F-NEXT: lui a2, 65536
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_53
|
||||
; RV64ZVE32F-NEXT: # %bb.52: # %cond.load82
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 29, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 28
|
||||
; RV64ZVE32F-NEXT: .LBB98_53: # %else83
|
||||
; RV64ZVE32F-NEXT: lui a2, 131072
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_55
|
||||
; RV64ZVE32F-NEXT: # %bb.54: # %cond.load85
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB98_63
|
||||
; RV64ZVE32F-NEXT: # %bb.42: # %else80
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 35
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB98_64
|
||||
; RV64ZVE32F-NEXT: .LBB98_43: # %else83
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 34
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB98_45
|
||||
; RV64ZVE32F-NEXT: .LBB98_44: # %cond.load85
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
|
||||
|
@ -13070,13 +12950,12 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 30, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 29
|
||||
; RV64ZVE32F-NEXT: .LBB98_55: # %else86
|
||||
; RV64ZVE32F-NEXT: .LBB98_45: # %else86
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 262144
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 33
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_57
|
||||
; RV64ZVE32F-NEXT: # %bb.56: # %cond.load88
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB98_47
|
||||
; RV64ZVE32F-NEXT: # %bb.46: # %cond.load88
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
|
@ -13085,11 +12964,11 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 31, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 30
|
||||
; RV64ZVE32F-NEXT: .LBB98_57: # %else89
|
||||
; RV64ZVE32F-NEXT: .LBB98_47: # %else89
|
||||
; RV64ZVE32F-NEXT: lui a2, 524288
|
||||
; RV64ZVE32F-NEXT: and a1, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a1, .LBB98_59
|
||||
; RV64ZVE32F-NEXT: # %bb.58: # %cond.load91
|
||||
; RV64ZVE32F-NEXT: beqz a1, .LBB98_49
|
||||
; RV64ZVE32F-NEXT: # %bb.48: # %cond.load91
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
|
||||
|
@ -13100,10 +12979,10 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 31
|
||||
; RV64ZVE32F-NEXT: .LBB98_59: # %else92
|
||||
; RV64ZVE32F-NEXT: .LBB98_49: # %else92
|
||||
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
|
||||
; RV64ZVE32F-NEXT: ret
|
||||
; RV64ZVE32F-NEXT: .LBB98_60: # %cond.load7
|
||||
; RV64ZVE32F-NEXT: .LBB98_50: # %cond.load7
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
|
@ -13116,7 +12995,7 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 3
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 16
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_8
|
||||
; RV64ZVE32F-NEXT: .LBB98_61: # %cond.load10
|
||||
; RV64ZVE32F-NEXT: .LBB98_51: # %cond.load10
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
|
@ -13129,7 +13008,7 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
|
|||
; RV64ZVE32F-NEXT: andi a2, a1, 32
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB98_9
|
||||
; RV64ZVE32F-NEXT: j .LBB98_10
|
||||
; RV64ZVE32F-NEXT: .LBB98_62: # %cond.load16
|
||||
; RV64ZVE32F-NEXT: .LBB98_52: # %cond.load16
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
|
@ -13140,7 +13019,7 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 6
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 128
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_12
|
||||
; RV64ZVE32F-NEXT: .LBB98_63: # %cond.load19
|
||||
; RV64ZVE32F-NEXT: .LBB98_53: # %cond.load19
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
|
||||
|
@ -13153,7 +13032,7 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
|
|||
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 7
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 256
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB98_13
|
||||
; RV64ZVE32F-NEXT: .LBB98_64: # %cond.load22
|
||||
; RV64ZVE32F-NEXT: .LBB98_54: # %cond.load22
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
|
@ -13166,6 +13045,132 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
|
|||
; RV64ZVE32F-NEXT: andi a2, a1, 512
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB98_14
|
||||
; RV64ZVE32F-NEXT: j .LBB98_15
|
||||
; RV64ZVE32F-NEXT: .LBB98_55: # %cond.load40
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 14
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 48
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB98_25
|
||||
; RV64ZVE32F-NEXT: .LBB98_56: # %cond.load43
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 15
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 47
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB98_26
|
||||
; RV64ZVE32F-NEXT: .LBB98_57: # %cond.load46
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 16
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 46
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB98_27
|
||||
; RV64ZVE32F-NEXT: j .LBB98_28
|
||||
; RV64ZVE32F-NEXT: .LBB98_58: # %cond.load55
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 20, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 19
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 43
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB98_32
|
||||
; RV64ZVE32F-NEXT: .LBB98_59: # %cond.load58
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 20
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 42
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB98_33
|
||||
; RV64ZVE32F-NEXT: j .LBB98_34
|
||||
; RV64ZVE32F-NEXT: .LBB98_60: # %cond.load64
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 23, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 22
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 40
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB98_36
|
||||
; RV64ZVE32F-NEXT: .LBB98_61: # %cond.load67
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 24, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 23
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 39
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB98_37
|
||||
; RV64ZVE32F-NEXT: .LBB98_62: # %cond.load70
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 25, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 24
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 38
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB98_38
|
||||
; RV64ZVE32F-NEXT: j .LBB98_39
|
||||
; RV64ZVE32F-NEXT: .LBB98_63: # %cond.load79
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 28, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 27
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 35
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB98_43
|
||||
; RV64ZVE32F-NEXT: .LBB98_64: # %cond.load82
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: lb a2, 0(a2)
|
||||
; RV64ZVE32F-NEXT: li a3, 32
|
||||
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 29, e8, m2, tu, mu
|
||||
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 28
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 34
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB98_44
|
||||
; RV64ZVE32F-NEXT: j .LBB98_45
|
||||
%ptrs = getelementptr inbounds i8, i8* %base, <32 x i8> %idxs
|
||||
%v = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> %ptrs, i32 2, <32 x i1> %m, <32 x i8> %passthru)
|
||||
ret <32 x i8> %v
|
||||
|
|
|
@ -10842,10 +10842,10 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 8
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB91_28
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB91_26
|
||||
; RV64ZVE32F-NEXT: # %bb.7: # %else6
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 16
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB91_29
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB91_27
|
||||
; RV64ZVE32F-NEXT: .LBB91_8: # %else8
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 32
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB91_10
|
||||
|
@ -10863,13 +10863,13 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 64
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB91_30
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB91_28
|
||||
; RV64ZVE32F-NEXT: # %bb.11: # %else12
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 128
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB91_31
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB91_29
|
||||
; RV64ZVE32F-NEXT: .LBB91_12: # %else14
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 256
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB91_32
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB91_30
|
||||
; RV64ZVE32F-NEXT: .LBB91_13: # %else16
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 512
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB91_15
|
||||
|
@ -10894,34 +10894,16 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB91_17: # %else20
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 1
|
||||
; RV64ZVE32F-NEXT: addiw a3, a2, -2048
|
||||
; RV64ZVE32F-NEXT: and a3, a1, a3
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 52
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4
|
||||
; RV64ZVE32F-NEXT: beqz a3, .LBB91_19
|
||||
; RV64ZVE32F-NEXT: # %bb.18: # %cond.store21
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
|
||||
; RV64ZVE32F-NEXT: add a3, a0, a3
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 11
|
||||
; RV64ZVE32F-NEXT: vse8.v v10, (a3)
|
||||
; RV64ZVE32F-NEXT: .LBB91_19: # %else22
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB91_21
|
||||
; RV64ZVE32F-NEXT: # %bb.20: # %cond.store23
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 12
|
||||
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB91_21: # %else24
|
||||
; RV64ZVE32F-NEXT: lui a2, 2
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB91_23
|
||||
; RV64ZVE32F-NEXT: # %bb.22: # %cond.store25
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB91_31
|
||||
; RV64ZVE32F-NEXT: # %bb.18: # %else22
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 51
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB91_32
|
||||
; RV64ZVE32F-NEXT: .LBB91_19: # %else24
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 50
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB91_21
|
||||
; RV64ZVE32F-NEXT: .LBB91_20: # %cond.store25
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
|
||||
|
@ -10929,23 +10911,22 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 13
|
||||
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB91_23: # %else26
|
||||
; RV64ZVE32F-NEXT: .LBB91_21: # %else26
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 4
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 49
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB91_25
|
||||
; RV64ZVE32F-NEXT: # %bb.24: # %cond.store27
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB91_23
|
||||
; RV64ZVE32F-NEXT: # %bb.22: # %cond.store27
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 14
|
||||
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB91_25: # %else28
|
||||
; RV64ZVE32F-NEXT: .LBB91_23: # %else28
|
||||
; RV64ZVE32F-NEXT: lui a2, 1048568
|
||||
; RV64ZVE32F-NEXT: and a1, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a1, .LBB91_27
|
||||
; RV64ZVE32F-NEXT: # %bb.26: # %cond.store29
|
||||
; RV64ZVE32F-NEXT: beqz a1, .LBB91_25
|
||||
; RV64ZVE32F-NEXT: # %bb.24: # %cond.store29
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a1, v9
|
||||
|
@ -10953,9 +10934,9 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 15
|
||||
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
|
||||
; RV64ZVE32F-NEXT: .LBB91_27: # %else30
|
||||
; RV64ZVE32F-NEXT: .LBB91_25: # %else30
|
||||
; RV64ZVE32F-NEXT: ret
|
||||
; RV64ZVE32F-NEXT: .LBB91_28: # %cond.store5
|
||||
; RV64ZVE32F-NEXT: .LBB91_26: # %cond.store5
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
|
||||
|
@ -10965,7 +10946,7 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 16
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB91_8
|
||||
; RV64ZVE32F-NEXT: .LBB91_29: # %cond.store7
|
||||
; RV64ZVE32F-NEXT: .LBB91_27: # %cond.store7
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
|
@ -10975,7 +10956,7 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: andi a2, a1, 32
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB91_9
|
||||
; RV64ZVE32F-NEXT: j .LBB91_10
|
||||
; RV64ZVE32F-NEXT: .LBB91_30: # %cond.store11
|
||||
; RV64ZVE32F-NEXT: .LBB91_28: # %cond.store11
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu
|
||||
|
@ -10983,7 +10964,7 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 128
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB91_12
|
||||
; RV64ZVE32F-NEXT: .LBB91_31: # %cond.store13
|
||||
; RV64ZVE32F-NEXT: .LBB91_29: # %cond.store13
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
|
||||
|
@ -10993,7 +10974,7 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 256
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB91_13
|
||||
; RV64ZVE32F-NEXT: .LBB91_32: # %cond.store15
|
||||
; RV64ZVE32F-NEXT: .LBB91_30: # %cond.store15
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
|
@ -11003,6 +10984,26 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: andi a2, a1, 512
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB91_14
|
||||
; RV64ZVE32F-NEXT: j .LBB91_15
|
||||
; RV64ZVE32F-NEXT: .LBB91_31: # %cond.store21
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 11
|
||||
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 51
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB91_19
|
||||
; RV64ZVE32F-NEXT: .LBB91_32: # %cond.store23
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 12
|
||||
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 50
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB91_20
|
||||
; RV64ZVE32F-NEXT: j .LBB91_21
|
||||
%ptrs = getelementptr inbounds i8, i8* %base, <16 x i8> %idxs
|
||||
call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %val, <16 x i8*> %ptrs, i32 1, <16 x i1> %m)
|
||||
ret void
|
||||
|
@ -11075,10 +11076,10 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 8
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v13, v10, 4
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB92_60
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB92_50
|
||||
; RV64ZVE32F-NEXT: # %bb.7: # %else6
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 16
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB92_61
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB92_51
|
||||
; RV64ZVE32F-NEXT: .LBB92_8: # %else8
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 32
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_10
|
||||
|
@ -11096,13 +11097,13 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 64
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB92_62
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB92_52
|
||||
; RV64ZVE32F-NEXT: # %bb.11: # %else12
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 128
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB92_63
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB92_53
|
||||
; RV64ZVE32F-NEXT: .LBB92_12: # %else14
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 256
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB92_64
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB92_54
|
||||
; RV64ZVE32F-NEXT: .LBB92_13: # %else16
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 512
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_15
|
||||
|
@ -11127,24 +11128,22 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_17: # %else20
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 1
|
||||
; RV64ZVE32F-NEXT: addiw a3, a2, -2048
|
||||
; RV64ZVE32F-NEXT: and a3, a1, a3
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 52
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 4
|
||||
; RV64ZVE32F-NEXT: beqz a3, .LBB92_19
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB92_19
|
||||
; RV64ZVE32F-NEXT: # %bb.18: # %cond.store21
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a3, v13
|
||||
; RV64ZVE32F-NEXT: add a3, a0, a3
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 11
|
||||
; RV64ZVE32F-NEXT: vse8.v v14, (a3)
|
||||
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_19: # %else22
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 51
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 16
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_21
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB92_21
|
||||
; RV64ZVE32F-NEXT: # %bb.20: # %cond.store23
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
|
@ -11152,9 +11151,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 12
|
||||
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_21: # %else24
|
||||
; RV64ZVE32F-NEXT: lui a2, 2
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_23
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 50
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB92_23
|
||||
; RV64ZVE32F-NEXT: # %bb.22: # %cond.store25
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
|
||||
|
@ -11165,44 +11163,19 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_23: # %else26
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 4
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 49
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_25
|
||||
; RV64ZVE32F-NEXT: # %bb.24: # %cond.store27
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 14
|
||||
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_25: # %else28
|
||||
; RV64ZVE32F-NEXT: lui a2, 8
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_27
|
||||
; RV64ZVE32F-NEXT: # %bb.26: # %cond.store29
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 15
|
||||
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_27: # %else30
|
||||
; RV64ZVE32F-NEXT: lui a2, 16
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_29
|
||||
; RV64ZVE32F-NEXT: # %bb.28: # %cond.store31
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 16
|
||||
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_29: # %else32
|
||||
; RV64ZVE32F-NEXT: lui a2, 32
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_31
|
||||
; RV64ZVE32F-NEXT: # %bb.30: # %cond.store33
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB92_55
|
||||
; RV64ZVE32F-NEXT: # %bb.24: # %else28
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 48
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB92_56
|
||||
; RV64ZVE32F-NEXT: .LBB92_25: # %else30
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 47
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB92_57
|
||||
; RV64ZVE32F-NEXT: .LBB92_26: # %else32
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 46
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB92_28
|
||||
; RV64ZVE32F-NEXT: .LBB92_27: # %cond.store33
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
|
@ -11210,48 +11183,29 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 17
|
||||
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_31: # %else34
|
||||
; RV64ZVE32F-NEXT: .LBB92_28: # %else34
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 64
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 45
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v13, v10, 2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_33
|
||||
; RV64ZVE32F-NEXT: # %bb.32: # %cond.store35
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB92_30
|
||||
; RV64ZVE32F-NEXT: # %bb.29: # %cond.store35
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 18
|
||||
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_33: # %else36
|
||||
; RV64ZVE32F-NEXT: .LBB92_30: # %else36
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 128
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 44
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_35
|
||||
; RV64ZVE32F-NEXT: # %bb.34: # %cond.store37
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 19
|
||||
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_35: # %else38
|
||||
; RV64ZVE32F-NEXT: lui a2, 256
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_37
|
||||
; RV64ZVE32F-NEXT: # %bb.36: # %cond.store39
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 20
|
||||
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_37: # %else40
|
||||
; RV64ZVE32F-NEXT: lui a2, 512
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_39
|
||||
; RV64ZVE32F-NEXT: # %bb.38: # %cond.store41
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB92_58
|
||||
; RV64ZVE32F-NEXT: # %bb.31: # %else38
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 43
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB92_59
|
||||
; RV64ZVE32F-NEXT: .LBB92_32: # %else40
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 42
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB92_34
|
||||
; RV64ZVE32F-NEXT: .LBB92_33: # %cond.store41
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
|
||||
|
@ -11259,48 +11213,23 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 21
|
||||
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_39: # %else42
|
||||
; RV64ZVE32F-NEXT: .LBB92_34: # %else42
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 8
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 1024
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 41
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v11, v12, 2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_41
|
||||
; RV64ZVE32F-NEXT: # %bb.40: # %cond.store43
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 22
|
||||
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_41: # %else44
|
||||
; RV64ZVE32F-NEXT: lui a2, 2048
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_43
|
||||
; RV64ZVE32F-NEXT: # %bb.42: # %cond.store45
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 23
|
||||
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_43: # %else46
|
||||
; RV64ZVE32F-NEXT: lui a2, 4096
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_45
|
||||
; RV64ZVE32F-NEXT: # %bb.44: # %cond.store47
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 24
|
||||
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_45: # %else48
|
||||
; RV64ZVE32F-NEXT: lui a2, 8192
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_47
|
||||
; RV64ZVE32F-NEXT: # %bb.46: # %cond.store49
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB92_60
|
||||
; RV64ZVE32F-NEXT: # %bb.35: # %else44
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 40
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB92_61
|
||||
; RV64ZVE32F-NEXT: .LBB92_36: # %else46
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 39
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB92_62
|
||||
; RV64ZVE32F-NEXT: .LBB92_37: # %else48
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 38
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB92_39
|
||||
; RV64ZVE32F-NEXT: .LBB92_38: # %cond.store49
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
|
||||
|
@ -11308,48 +11237,29 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 25
|
||||
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_47: # %else50
|
||||
; RV64ZVE32F-NEXT: .LBB92_39: # %else50
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 16384
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 37
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_49
|
||||
; RV64ZVE32F-NEXT: # %bb.48: # %cond.store51
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB92_41
|
||||
; RV64ZVE32F-NEXT: # %bb.40: # %cond.store51
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 26
|
||||
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_49: # %else52
|
||||
; RV64ZVE32F-NEXT: .LBB92_41: # %else52
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 32768
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 36
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_51
|
||||
; RV64ZVE32F-NEXT: # %bb.50: # %cond.store53
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 27
|
||||
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_51: # %else54
|
||||
; RV64ZVE32F-NEXT: lui a2, 65536
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_53
|
||||
; RV64ZVE32F-NEXT: # %bb.52: # %cond.store55
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 28
|
||||
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_53: # %else56
|
||||
; RV64ZVE32F-NEXT: lui a2, 131072
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_55
|
||||
; RV64ZVE32F-NEXT: # %bb.54: # %cond.store57
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB92_63
|
||||
; RV64ZVE32F-NEXT: # %bb.42: # %else54
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 35
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB92_64
|
||||
; RV64ZVE32F-NEXT: .LBB92_43: # %else56
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 34
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB92_45
|
||||
; RV64ZVE32F-NEXT: .LBB92_44: # %cond.store57
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
|
||||
|
@ -11357,23 +11267,22 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 29
|
||||
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_55: # %else58
|
||||
; RV64ZVE32F-NEXT: .LBB92_45: # %else58
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: lui a2, 262144
|
||||
; RV64ZVE32F-NEXT: and a2, a1, a2
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 33
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_57
|
||||
; RV64ZVE32F-NEXT: # %bb.56: # %cond.store59
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB92_47
|
||||
; RV64ZVE32F-NEXT: # %bb.46: # %cond.store59
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 30
|
||||
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
|
||||
; RV64ZVE32F-NEXT: .LBB92_57: # %else60
|
||||
; RV64ZVE32F-NEXT: .LBB92_47: # %else60
|
||||
; RV64ZVE32F-NEXT: lui a2, 524288
|
||||
; RV64ZVE32F-NEXT: and a1, a1, a2
|
||||
; RV64ZVE32F-NEXT: beqz a1, .LBB92_59
|
||||
; RV64ZVE32F-NEXT: # %bb.58: # %cond.store61
|
||||
; RV64ZVE32F-NEXT: beqz a1, .LBB92_49
|
||||
; RV64ZVE32F-NEXT: # %bb.48: # %cond.store61
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a1, v10
|
||||
|
@ -11381,9 +11290,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 31
|
||||
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
|
||||
; RV64ZVE32F-NEXT: .LBB92_59: # %else62
|
||||
; RV64ZVE32F-NEXT: .LBB92_49: # %else62
|
||||
; RV64ZVE32F-NEXT: ret
|
||||
; RV64ZVE32F-NEXT: .LBB92_60: # %cond.store5
|
||||
; RV64ZVE32F-NEXT: .LBB92_50: # %cond.store5
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
|
@ -11393,7 +11302,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 16
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_8
|
||||
; RV64ZVE32F-NEXT: .LBB92_61: # %cond.store7
|
||||
; RV64ZVE32F-NEXT: .LBB92_51: # %cond.store7
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
|
@ -11403,7 +11312,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: andi a2, a1, 32
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB92_9
|
||||
; RV64ZVE32F-NEXT: j .LBB92_10
|
||||
; RV64ZVE32F-NEXT: .LBB92_62: # %cond.store11
|
||||
; RV64ZVE32F-NEXT: .LBB92_52: # %cond.store11
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
|
@ -11411,7 +11320,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 128
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_12
|
||||
; RV64ZVE32F-NEXT: .LBB92_63: # %cond.store13
|
||||
; RV64ZVE32F-NEXT: .LBB92_53: # %cond.store13
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
|
||||
|
@ -11421,7 +11330,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
|
||||
; RV64ZVE32F-NEXT: andi a2, a1, 256
|
||||
; RV64ZVE32F-NEXT: beqz a2, .LBB92_13
|
||||
; RV64ZVE32F-NEXT: .LBB92_64: # %cond.store15
|
||||
; RV64ZVE32F-NEXT: .LBB92_54: # %cond.store15
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
|
@ -11431,6 +11340,102 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
|
|||
; RV64ZVE32F-NEXT: andi a2, a1, 512
|
||||
; RV64ZVE32F-NEXT: bnez a2, .LBB92_14
|
||||
; RV64ZVE32F-NEXT: j .LBB92_15
|
||||
; RV64ZVE32F-NEXT: .LBB92_55: # %cond.store27
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 14
|
||||
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 48
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB92_25
|
||||
; RV64ZVE32F-NEXT: .LBB92_56: # %cond.store29
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 15
|
||||
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 47
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB92_26
|
||||
; RV64ZVE32F-NEXT: .LBB92_57: # %cond.store31
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 16
|
||||
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 46
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB92_27
|
||||
; RV64ZVE32F-NEXT: j .LBB92_28
|
||||
; RV64ZVE32F-NEXT: .LBB92_58: # %cond.store37
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 19
|
||||
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 43
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB92_32
|
||||
; RV64ZVE32F-NEXT: .LBB92_59: # %cond.store39
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 20
|
||||
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 42
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB92_33
|
||||
; RV64ZVE32F-NEXT: j .LBB92_34
|
||||
; RV64ZVE32F-NEXT: .LBB92_60: # %cond.store43
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 22
|
||||
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 40
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB92_36
|
||||
; RV64ZVE32F-NEXT: .LBB92_61: # %cond.store45
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 23
|
||||
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 39
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB92_37
|
||||
; RV64ZVE32F-NEXT: .LBB92_62: # %cond.store47
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 24
|
||||
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 38
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB92_38
|
||||
; RV64ZVE32F-NEXT: j .LBB92_39
|
||||
; RV64ZVE32F-NEXT: .LBB92_63: # %cond.store53
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 27
|
||||
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 35
|
||||
; RV64ZVE32F-NEXT: bgez a2, .LBB92_43
|
||||
; RV64ZVE32F-NEXT: .LBB92_64: # %cond.store55
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu
|
||||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
|
||||
; RV64ZVE32F-NEXT: add a2, a0, a2
|
||||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu
|
||||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 28
|
||||
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
|
||||
; RV64ZVE32F-NEXT: slli a2, a1, 34
|
||||
; RV64ZVE32F-NEXT: bltz a2, .LBB92_44
|
||||
; RV64ZVE32F-NEXT: j .LBB92_45
|
||||
%ptrs = getelementptr inbounds i8, i8* %base, <32 x i8> %idxs
|
||||
call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> %val, <32 x i8*> %ptrs, i32 1, <32 x i1> %m)
|
||||
ret void
|
||||
|
|
Loading…
Reference in New Issue