[RISCV] Use getSplatBuildVector instead of getSplatVector for fixed vectors.

The splat_vector will be legalized to build_vector eventually
anyway. This patch makes it take fewer steps.

Unfortunately, this results in some codegen changes. It looks
like it comes down to how the nodes were ordered in the topological
sort for isel. Because the build_vector is created earlier we end up
with a different ordering of nodes.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D122185
This commit is contained in:
Craig Topper 2022-03-30 11:33:35 -07:00
parent c28ce745cf
commit 7417eb29ce
2 changed files with 55 additions and 31 deletions

View File

@ -2172,18 +2172,18 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
(StepOpcode == ISD::SHL && SplatStepVal != 0)) {
SDValue SplatStep = DAG.getSplatVector(
SDValue SplatStep = DAG.getSplatBuildVector(
VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
VID = DAG.getNode(StepOpcode, DL, VT, VID, SplatStep);
}
if (StepDenominator != 1) {
SDValue SplatStep = DAG.getSplatVector(
SDValue SplatStep = DAG.getSplatBuildVector(
VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
}
if (Addend != 0 || Negate) {
SDValue SplatAddend =
DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT));
SDValue SplatAddend = DAG.getSplatBuildVector(
VT, DL, DAG.getConstant(Addend, DL, XLenVT));
VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
}
return VID;

View File

@ -537,29 +537,53 @@ define void @buildvec_seq_v4i16_v2i32(<4 x i16>* %x) {
}
define void @buildvec_vid_step1o2_v4i32(<4 x i32>* %z0, <4 x i32>* %z1, <4 x i32>* %z2, <4 x i32>* %z3, <4 x i32>* %z4, <4 x i32>* %z5, <4 x i32>* %z6) {
; CHECK-LABEL: buildvec_vid_step1o2_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vsrl.vi v8, v8, 1
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: vse32.v v8, (a1)
; CHECK-NEXT: vse32.v v8, (a2)
; CHECK-NEXT: vse32.v v8, (a3)
; CHECK-NEXT: vse32.v v8, (a4)
; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: vmv.v.i v9, 1
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu
; CHECK-NEXT: vslideup.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vse32.v v9, (a5)
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu
; CHECK-NEXT: vslideup.vi v9, v8, 3
; CHECK-NEXT: vse32.v v9, (a6)
; CHECK-NEXT: ret
; RV32-LABEL: buildvec_vid_step1o2_v4i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; RV32-NEXT: vid.v v8
; RV32-NEXT: vsrl.vi v8, v8, 1
; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: vse32.v v8, (a1)
; RV32-NEXT: vmv.v.i v9, 1
; RV32-NEXT: vse32.v v8, (a2)
; RV32-NEXT: vse32.v v8, (a3)
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: vmv.s.x v8, zero
; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, mu
; RV32-NEXT: vslideup.vi v9, v8, 1
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; RV32-NEXT: vse32.v v9, (a5)
; RV32-NEXT: li a0, 1
; RV32-NEXT: vmv.s.x v8, a0
; RV32-NEXT: vmv.v.i v9, 0
; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, mu
; RV32-NEXT: vslideup.vi v9, v8, 3
; RV32-NEXT: vse32.v v9, (a6)
; RV32-NEXT: ret
;
; RV64-LABEL: buildvec_vid_step1o2_v4i32:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; RV64-NEXT: vid.v v8
; RV64-NEXT: vsrl.vi v8, v8, 1
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: vmv.v.i v9, 1
; RV64-NEXT: vse32.v v8, (a1)
; RV64-NEXT: vse32.v v8, (a2)
; RV64-NEXT: vse32.v v8, (a3)
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: vmv.s.x v8, zero
; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, mu
; RV64-NEXT: vslideup.vi v9, v8, 1
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; RV64-NEXT: vse32.v v9, (a5)
; RV64-NEXT: li a0, 1
; RV64-NEXT: vmv.s.x v8, a0
; RV64-NEXT: vmv.v.i v9, 0
; RV64-NEXT: vsetvli zero, zero, e32, m1, tu, mu
; RV64-NEXT: vslideup.vi v9, v8, 3
; RV64-NEXT: vse32.v v9, (a6)
; RV64-NEXT: ret
store <4 x i32> <i32 0, i32 0, i32 1, i32 1>, <4 x i32>* %z0
store <4 x i32> <i32 0, i32 0, i32 1, i32 undef>, <4 x i32>* %z1
store <4 x i32> <i32 0, i32 undef, i32 1, i32 1>, <4 x i32>* %z2
@ -580,20 +604,20 @@ define void @buildvec_vid_step1o2_add3_v4i16(<4 x i16>* %z0, <4 x i16>* %z1, <4
; CHECK-NEXT: vsrl.vi v8, v8, 1
; CHECK-NEXT: vadd.vi v8, v8, 3
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: vse16.v v8, (a2)
; CHECK-NEXT: vse16.v v8, (a3)
; CHECK-NEXT: vse16.v v8, (a4)
; CHECK-NEXT: li a0, 3
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vmv.v.i v9, 4
; CHECK-NEXT: vmv.v.i v10, 4
; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, mu
; CHECK-NEXT: vslideup.vi v9, v8, 1
; CHECK-NEXT: vslideup.vi v10, v8, 1
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; CHECK-NEXT: vse16.v v9, (a5)
; CHECK-NEXT: vse16.v v10, (a5)
; CHECK-NEXT: li a0, 4
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu
; CHECK-NEXT: vslideup.vi v9, v8, 3
; CHECK-NEXT: vse16.v v9, (a6)