[RISCV] Use getSplatBuildVector instead of getSplatVector for fixed vectors.
The splat_vector will be legalized to build_vector eventually anyway. This patch makes it take fewer steps. Unfortunately, this results in some codegen changes. It looks like it comes down to how the nodes were ordered in the topological sort for isel. Because the build_vector is created earlier we end up with a different ordering of nodes. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D122185
This commit is contained in:
parent
c28ce745cf
commit
7417eb29ce
|
@ -2172,18 +2172,18 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
|||
VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
|
||||
if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
|
||||
(StepOpcode == ISD::SHL && SplatStepVal != 0)) {
|
||||
SDValue SplatStep = DAG.getSplatVector(
|
||||
SDValue SplatStep = DAG.getSplatBuildVector(
|
||||
VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
|
||||
VID = DAG.getNode(StepOpcode, DL, VT, VID, SplatStep);
|
||||
}
|
||||
if (StepDenominator != 1) {
|
||||
SDValue SplatStep = DAG.getSplatVector(
|
||||
SDValue SplatStep = DAG.getSplatBuildVector(
|
||||
VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
|
||||
VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
|
||||
}
|
||||
if (Addend != 0 || Negate) {
|
||||
SDValue SplatAddend =
|
||||
DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT));
|
||||
SDValue SplatAddend = DAG.getSplatBuildVector(
|
||||
VT, DL, DAG.getConstant(Addend, DL, XLenVT));
|
||||
VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
|
||||
}
|
||||
return VID;
|
||||
|
|
|
@ -537,29 +537,53 @@ define void @buildvec_seq_v4i16_v2i32(<4 x i16>* %x) {
|
|||
}
|
||||
|
||||
define void @buildvec_vid_step1o2_v4i32(<4 x i32>* %z0, <4 x i32>* %z1, <4 x i32>* %z2, <4 x i32>* %z3, <4 x i32>* %z4, <4 x i32>* %z5, <4 x i32>* %z6) {
|
||||
; CHECK-LABEL: buildvec_vid_step1o2_v4i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vid.v v8
|
||||
; CHECK-NEXT: vsrl.vi v8, v8, 1
|
||||
; CHECK-NEXT: vse32.v v8, (a0)
|
||||
; CHECK-NEXT: vse32.v v8, (a1)
|
||||
; CHECK-NEXT: vse32.v v8, (a2)
|
||||
; CHECK-NEXT: vse32.v v8, (a3)
|
||||
; CHECK-NEXT: vse32.v v8, (a4)
|
||||
; CHECK-NEXT: vmv.s.x v8, zero
|
||||
; CHECK-NEXT: vmv.v.i v9, 1
|
||||
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu
|
||||
; CHECK-NEXT: vslideup.vi v9, v8, 1
|
||||
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vse32.v v9, (a5)
|
||||
; CHECK-NEXT: li a0, 1
|
||||
; CHECK-NEXT: vmv.s.x v8, a0
|
||||
; CHECK-NEXT: vmv.v.i v9, 0
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu
|
||||
; CHECK-NEXT: vslideup.vi v9, v8, 3
|
||||
; CHECK-NEXT: vse32.v v9, (a6)
|
||||
; CHECK-NEXT: ret
|
||||
; RV32-LABEL: buildvec_vid_step1o2_v4i32:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
||||
; RV32-NEXT: vid.v v8
|
||||
; RV32-NEXT: vsrl.vi v8, v8, 1
|
||||
; RV32-NEXT: vse32.v v8, (a0)
|
||||
; RV32-NEXT: vse32.v v8, (a1)
|
||||
; RV32-NEXT: vmv.v.i v9, 1
|
||||
; RV32-NEXT: vse32.v v8, (a2)
|
||||
; RV32-NEXT: vse32.v v8, (a3)
|
||||
; RV32-NEXT: vse32.v v8, (a4)
|
||||
; RV32-NEXT: vmv.s.x v8, zero
|
||||
; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, mu
|
||||
; RV32-NEXT: vslideup.vi v9, v8, 1
|
||||
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
||||
; RV32-NEXT: vse32.v v9, (a5)
|
||||
; RV32-NEXT: li a0, 1
|
||||
; RV32-NEXT: vmv.s.x v8, a0
|
||||
; RV32-NEXT: vmv.v.i v9, 0
|
||||
; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, mu
|
||||
; RV32-NEXT: vslideup.vi v9, v8, 3
|
||||
; RV32-NEXT: vse32.v v9, (a6)
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: buildvec_vid_step1o2_v4i32:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
||||
; RV64-NEXT: vid.v v8
|
||||
; RV64-NEXT: vsrl.vi v8, v8, 1
|
||||
; RV64-NEXT: vse32.v v8, (a0)
|
||||
; RV64-NEXT: vmv.v.i v9, 1
|
||||
; RV64-NEXT: vse32.v v8, (a1)
|
||||
; RV64-NEXT: vse32.v v8, (a2)
|
||||
; RV64-NEXT: vse32.v v8, (a3)
|
||||
; RV64-NEXT: vse32.v v8, (a4)
|
||||
; RV64-NEXT: vmv.s.x v8, zero
|
||||
; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, mu
|
||||
; RV64-NEXT: vslideup.vi v9, v8, 1
|
||||
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
||||
; RV64-NEXT: vse32.v v9, (a5)
|
||||
; RV64-NEXT: li a0, 1
|
||||
; RV64-NEXT: vmv.s.x v8, a0
|
||||
; RV64-NEXT: vmv.v.i v9, 0
|
||||
; RV64-NEXT: vsetvli zero, zero, e32, m1, tu, mu
|
||||
; RV64-NEXT: vslideup.vi v9, v8, 3
|
||||
; RV64-NEXT: vse32.v v9, (a6)
|
||||
; RV64-NEXT: ret
|
||||
store <4 x i32> <i32 0, i32 0, i32 1, i32 1>, <4 x i32>* %z0
|
||||
store <4 x i32> <i32 0, i32 0, i32 1, i32 undef>, <4 x i32>* %z1
|
||||
store <4 x i32> <i32 0, i32 undef, i32 1, i32 1>, <4 x i32>* %z2
|
||||
|
@ -580,20 +604,20 @@ define void @buildvec_vid_step1o2_add3_v4i16(<4 x i16>* %z0, <4 x i16>* %z1, <4
|
|||
; CHECK-NEXT: vsrl.vi v8, v8, 1
|
||||
; CHECK-NEXT: vadd.vi v8, v8, 3
|
||||
; CHECK-NEXT: vse16.v v8, (a0)
|
||||
; CHECK-NEXT: vmv.v.i v9, 3
|
||||
; CHECK-NEXT: vse16.v v8, (a1)
|
||||
; CHECK-NEXT: vse16.v v8, (a2)
|
||||
; CHECK-NEXT: vse16.v v8, (a3)
|
||||
; CHECK-NEXT: vse16.v v8, (a4)
|
||||
; CHECK-NEXT: li a0, 3
|
||||
; CHECK-NEXT: vmv.s.x v8, a0
|
||||
; CHECK-NEXT: vmv.v.i v9, 4
|
||||
; CHECK-NEXT: vmv.v.i v10, 4
|
||||
; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, mu
|
||||
; CHECK-NEXT: vslideup.vi v9, v8, 1
|
||||
; CHECK-NEXT: vslideup.vi v10, v8, 1
|
||||
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
|
||||
; CHECK-NEXT: vse16.v v9, (a5)
|
||||
; CHECK-NEXT: vse16.v v10, (a5)
|
||||
; CHECK-NEXT: li a0, 4
|
||||
; CHECK-NEXT: vmv.s.x v8, a0
|
||||
; CHECK-NEXT: vmv.v.i v9, 3
|
||||
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu
|
||||
; CHECK-NEXT: vslideup.vi v9, v8, 3
|
||||
; CHECK-NEXT: vse16.v v9, (a6)
|
||||
|
|
Loading…
Reference in New Issue