[X86] combineConcatVectorOps - fold v4i64/v8x32 concat(broadcast(),broadcast()) -> permilps(concat())

Extend the existing v4f64 fold to handle v4i64/v8f32/v8i32 as well

Fixes #58585
This commit is contained in:
Simon Pilgrim 2022-10-25 15:34:23 +01:00
parent 3125a4dbc8
commit ed1b0da557
3 changed files with 29 additions and 34 deletions

View File

@ -54460,11 +54460,20 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
unsigned NumOps = Ops.size();
switch (Op0.getOpcode()) {
case X86ISD::VBROADCAST: {
if (!IsSplat && VT == MVT::v4f64 && llvm::all_of(Ops, [](SDValue Op) {
if (!IsSplat && llvm::all_of(Ops, [](SDValue Op) {
return Op.getOperand(0).getValueType().is128BitVector();
}))
return DAG.getNode(X86ISD::MOVDDUP, DL, VT,
ConcatSubOperand(VT, Ops, 0));
})) {
if (VT == MVT::v4f64 || VT == MVT::v4i64)
return DAG.getNode(X86ISD::UNPCKL, DL, VT,
ConcatSubOperand(VT, Ops, 0),
ConcatSubOperand(VT, Ops, 0));
// TODO: Add pseudo v8i32 PSHUFD handling to AVX1Only targets.
if (VT == MVT::v8f32 || (VT == MVT::v8i32 && Subtarget.hasInt256()))
return DAG.getNode(VT == MVT::v8f32 ? X86ISD::VPERMILPI
: X86ISD::PSHUFD,
DL, VT, ConcatSubOperand(VT, Ops, 0),
getV4X86ShuffleImm8ForMask({0, 0, 0, 0}, DL, DAG));
}
break;
}
case X86ISD::MOVDDUP:

View File

@ -1550,16 +1550,16 @@ define <4 x i64> @shuffle_v4i64_0044_v2i64(<2 x i64> %a, <2 x i64> %b) {
;
; AVX2-LABEL: shuffle_v4i64_0044_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v4i64_0044_v2i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX512VL-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
; AVX512VL-NEXT: retq
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> <i32 0, i32 0>

View File

@ -639,19 +639,12 @@ define <8 x float> @shuffle_v8f32_00224466_v4f32(<4 x float> %a, <4 x float> %b)
}
define <8 x float> @shuffle_v8f32_00004444_v4f32(<4 x float> %a, <4 x float> %b) {
; AVX1-LABEL: shuffle_v8f32_00004444_v4f32:
; AVX1: # %bb.0:
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; AVX1-NEXT: retq
;
; AVX2OR512VL-LABEL: shuffle_v8f32_00004444_v4f32:
; AVX2OR512VL: # %bb.0:
; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %xmm0
; AVX2OR512VL-NEXT: vbroadcastss %xmm1, %xmm1
; AVX2OR512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2OR512VL-NEXT: retq
; ALL-LABEL: shuffle_v8f32_00004444_v4f32:
; ALL: # %bb.0:
; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; ALL-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x float> %1
}
@ -3289,19 +3282,12 @@ define <8 x i32> @shuffle_v8i32_32107654_v4i32(<4 x i32> %a, <4 x i32> %b) {
}
define <8 x i32> @shuffle_v8i32_00004444_v4f32(<4 x i32> %a, <4 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00004444_v4f32:
; AVX1: # %bb.0:
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; AVX1-NEXT: retq
;
; AVX2OR512VL-LABEL: shuffle_v8i32_00004444_v4f32:
; AVX2OR512VL: # %bb.0:
; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %xmm0
; AVX2OR512VL-NEXT: vbroadcastss %xmm1, %xmm1
; AVX2OR512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2OR512VL-NEXT: retq
; ALL-LABEL: shuffle_v8i32_00004444_v4f32:
; ALL: # %bb.0:
; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; ALL-NEXT: retq
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x i32> %1
}