[X86] combineConcatVectorOps - fold v4i64/v8x32 concat(broadcast(),broadcast()) -> permilps(concat())
Extend the existing v4f64 fold to handle v4i64/v8f32/v8i32 as well Fixes #58585
This commit is contained in:
parent
3125a4dbc8
commit
ed1b0da557
|
@ -54460,11 +54460,20 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
|
|||
unsigned NumOps = Ops.size();
|
||||
switch (Op0.getOpcode()) {
|
||||
case X86ISD::VBROADCAST: {
|
||||
if (!IsSplat && VT == MVT::v4f64 && llvm::all_of(Ops, [](SDValue Op) {
|
||||
if (!IsSplat && llvm::all_of(Ops, [](SDValue Op) {
|
||||
return Op.getOperand(0).getValueType().is128BitVector();
|
||||
}))
|
||||
return DAG.getNode(X86ISD::MOVDDUP, DL, VT,
|
||||
ConcatSubOperand(VT, Ops, 0));
|
||||
})) {
|
||||
if (VT == MVT::v4f64 || VT == MVT::v4i64)
|
||||
return DAG.getNode(X86ISD::UNPCKL, DL, VT,
|
||||
ConcatSubOperand(VT, Ops, 0),
|
||||
ConcatSubOperand(VT, Ops, 0));
|
||||
// TODO: Add pseudo v8i32 PSHUFD handling to AVX1Only targets.
|
||||
if (VT == MVT::v8f32 || (VT == MVT::v8i32 && Subtarget.hasInt256()))
|
||||
return DAG.getNode(VT == MVT::v8f32 ? X86ISD::VPERMILPI
|
||||
: X86ISD::PSHUFD,
|
||||
DL, VT, ConcatSubOperand(VT, Ops, 0),
|
||||
getV4X86ShuffleImm8ForMask({0, 0, 0, 0}, DL, DAG));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case X86ISD::MOVDDUP:
|
||||
|
|
|
@ -1550,16 +1550,16 @@ define <4 x i64> @shuffle_v4i64_0044_v2i64(<2 x i64> %a, <2 x i64> %b) {
|
|||
;
|
||||
; AVX2-LABEL: shuffle_v4i64_0044_v2i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
|
||||
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v4i64_0044_v2i64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX512VL-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
|
||||
; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
|
||||
; AVX512VL-NEXT: retq
|
||||
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
||||
%2 = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
||||
|
|
|
@ -639,19 +639,12 @@ define <8 x float> @shuffle_v8f32_00224466_v4f32(<4 x float> %a, <4 x float> %b)
|
|||
}
|
||||
|
||||
define <8 x float> @shuffle_v8f32_00004444_v4f32(<4 x float> %a, <4 x float> %b) {
|
||||
; AVX1-LABEL: shuffle_v8f32_00004444_v4f32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2OR512VL-LABEL: shuffle_v8f32_00004444_v4f32:
|
||||
; AVX2OR512VL: # %bb.0:
|
||||
; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %xmm0
|
||||
; AVX2OR512VL-NEXT: vbroadcastss %xmm1, %xmm1
|
||||
; AVX2OR512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX2OR512VL-NEXT: retq
|
||||
; ALL-LABEL: shuffle_v8f32_00004444_v4f32:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
|
||||
; ALL-NEXT: retq
|
||||
%1 = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
|
||||
ret <8 x float> %1
|
||||
}
|
||||
|
@ -3289,19 +3282,12 @@ define <8 x i32> @shuffle_v8i32_32107654_v4i32(<4 x i32> %a, <4 x i32> %b) {
|
|||
}
|
||||
|
||||
define <8 x i32> @shuffle_v8i32_00004444_v4f32(<4 x i32> %a, <4 x i32> %b) {
|
||||
; AVX1-LABEL: shuffle_v8i32_00004444_v4f32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2OR512VL-LABEL: shuffle_v8i32_00004444_v4f32:
|
||||
; AVX2OR512VL: # %bb.0:
|
||||
; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %xmm0
|
||||
; AVX2OR512VL-NEXT: vbroadcastss %xmm1, %xmm1
|
||||
; AVX2OR512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX2OR512VL-NEXT: retq
|
||||
; ALL-LABEL: shuffle_v8i32_00004444_v4f32:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
|
||||
; ALL-NEXT: retq
|
||||
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
|
||||
ret <8 x i32> %1
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue