[X86] combineScalarAndWithMaskSetcc - optionally peek through (oneuse) any_extend node

Extend pass to handle: (and (any_extend (bitcast (vXi1 (concat_vectors (vYi1 setcc), undef,)))), C)

Fixes several regressions identified in D127115
This commit is contained in:
Simon Pilgrim 2022-11-24 16:26:29 +00:00
parent b0d4045dab
commit dbe2f44316
3 changed files with 29 additions and 18 deletions

View File

@ -48610,20 +48610,31 @@ static SDValue combineScalarAndWithMaskSetcc(SDNode *N, SelectionDAG &DAG,
// Make sure this is an AND with constant. We will check the value of the
// constant later.
if (!isa<ConstantSDNode>(N->getOperand(1)))
auto *C1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!C1)
return SDValue();
// This is implied by the ConstantSDNode.
assert(!VT.isVector() && "Expected scalar VT!");
if (N->getOperand(0).getOpcode() != ISD::BITCAST ||
!N->getOperand(0).hasOneUse() ||
!N->getOperand(0).getOperand(0).hasOneUse())
SDValue Src = N->getOperand(0);
if (!Src.hasOneUse())
return SDValue();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Src = N->getOperand(0).getOperand(0);
// (Optionally) peek through any_extend().
if (Src.getOpcode() == ISD::ANY_EXTEND) {
if (!Src.getOperand(0).hasOneUse())
return SDValue();
Src = Src.getOperand(0);
}
if (Src.getOpcode() != ISD::BITCAST || !Src.getOperand(0).hasOneUse())
return SDValue();
Src = Src.getOperand(0);
EVT SrcVT = Src.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!SrcVT.isVector() || SrcVT.getVectorElementType() != MVT::i1 ||
!TLI.isTypeLegal(SrcVT))
return SDValue();
@ -48639,7 +48650,7 @@ static SDValue combineScalarAndWithMaskSetcc(SDNode *N, SelectionDAG &DAG,
// First subvector should be a setcc with a legal result type. The RHS of the
// AND should be a mask with this many bits.
if (SubVec.getOpcode() != ISD::SETCC || !TLI.isTypeLegal(SubVecVT) ||
!N->getConstantOperandAPInt(1).isMask(SubVecVT.getVectorNumElements()))
!C1->getAPIntValue().isMask(SubVecVT.getVectorNumElements()))
return SDValue();
EVT SetccVT = SubVec.getOperand(0).getValueType();
@ -48658,7 +48669,8 @@ static SDValue combineScalarAndWithMaskSetcc(SDNode *N, SelectionDAG &DAG,
Ops[0] = SubVec;
SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT,
Ops);
return DAG.getBitcast(VT, Concat);
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getSizeInBits());
return DAG.getZExtOrTrunc(DAG.getBitcast(IntVT, Concat), dl, VT);
}
static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,

View File

@ -57,7 +57,6 @@ define i32 @PR48215(i32 %a0, i32 %a1) {
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: movzbl %al, %ecx
; AVX512-NEXT: kmovw %k1, %eax
; AVX512-NEXT: andl $15, %eax
; AVX512-NEXT: addl %ecx, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq

View File

@ -53,7 +53,7 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) {
; AVX512VL-NEXT: vpsllq $63, %xmm0, %xmm0
; AVX512VL-NEXT: vptestmq %xmm0, %xmm0, %k0
; AVX512VL-NEXT: kmovd %k0, %eax
; AVX512VL-NEXT: testb $3, %al
; AVX512VL-NEXT: testb %al, %al
; AVX512VL-NEXT: setnp %al
; AVX512VL-NEXT: retq
%a = trunc <2 x i64> %0 to <2 x i1>
@ -103,7 +103,7 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) {
; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k0
; AVX512VL-NEXT: kmovd %k0, %eax
; AVX512VL-NEXT: testb $15, %al
; AVX512VL-NEXT: testb %al, %al
; AVX512VL-NEXT: setnp %al
; AVX512VL-NEXT: retq
%a = trunc <4 x i32> %0 to <4 x i1>
@ -248,7 +248,7 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) {
; AVX512VL-NEXT: vpsllq $63, %ymm0, %ymm0
; AVX512VL-NEXT: vptestmq %ymm0, %ymm0, %k0
; AVX512VL-NEXT: kmovd %k0, %eax
; AVX512VL-NEXT: testb $15, %al
; AVX512VL-NEXT: testb %al, %al
; AVX512VL-NEXT: setnp %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@ -956,7 +956,7 @@ define i1 @icmp0_v2i64_v2i1(<2 x i64>) {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k0
; AVX512VL-NEXT: kmovd %k0, %eax
; AVX512VL-NEXT: testb $3, %al
; AVX512VL-NEXT: testb %al, %al
; AVX512VL-NEXT: setnp %al
; AVX512VL-NEXT: retq
%a = icmp eq <2 x i64> %0, zeroinitializer
@ -1007,7 +1007,7 @@ define i1 @icmp0_v4i32_v4i1(<4 x i32>) {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vptestnmd %xmm0, %xmm0, %k0
; AVX512VL-NEXT: kmovd %k0, %eax
; AVX512VL-NEXT: testb $15, %al
; AVX512VL-NEXT: testb %al, %al
; AVX512VL-NEXT: setnp %al
; AVX512VL-NEXT: retq
%a = icmp eq <4 x i32> %0, zeroinitializer
@ -1198,7 +1198,7 @@ define i1 @icmp0_v4i64_v4i1(<4 x i64>) {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vptestnmq %ymm0, %ymm0, %k0
; AVX512VL-NEXT: kmovd %k0, %eax
; AVX512VL-NEXT: testb $15, %al
; AVX512VL-NEXT: testb %al, %al
; AVX512VL-NEXT: setnp %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@ -1897,7 +1897,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>, <2 x i64>) {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
; AVX512VL-NEXT: kmovd %k0, %eax
; AVX512VL-NEXT: testb $3, %al
; AVX512VL-NEXT: testb %al, %al
; AVX512VL-NEXT: setnp %al
; AVX512VL-NEXT: retq
%a = icmp eq <2 x i64> %0, %1
@ -1948,7 +1948,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>, <4 x i32>) {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
; AVX512VL-NEXT: kmovd %k0, %eax
; AVX512VL-NEXT: testb $15, %al
; AVX512VL-NEXT: testb %al, %al
; AVX512VL-NEXT: setnp %al
; AVX512VL-NEXT: retq
%a = icmp eq <4 x i32> %0, %1
@ -2134,7 +2134,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
; AVX512VL-NEXT: kmovd %k0, %eax
; AVX512VL-NEXT: testb $15, %al
; AVX512VL-NEXT: testb %al, %al
; AVX512VL-NEXT: setnp %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq