[SelectionDAG] computeKnownBits - support constant pool values from target
This patch adds the overridable TargetLowering::getTargetConstantFromLoad function which allows targets to return any constant value loaded by a LoadSDNode node - only X86 makes use of this so far but everything should be in place for other targets. computeKnownBits then uses this function to improve codegen, notably vector code after legalization. A future commit will do the same for ComputeNumSignBits but computeKnownBits sees the bigger benefit. This required a couple of fixes: * SimplifyDemandedBits must early-out for getTargetConstantFromLoad cases to prevent infinite loops of constant regeneration (similar to what we already do for BUILD_VECTOR). * Fix a DAGCombiner::visitTRUNCATE issue as we had trunc(shl(v8i32),v8i16) <-> shl(trunc(v8i16),v8i32) infinite loops after legalization on AVX512 targets. Differential Revision: https://reviews.llvm.org/D61887 llvm-svn: 361620
This commit is contained in:
parent
980f760515
commit
95b8d9bbf8
|
@ -3119,6 +3119,10 @@ public:
|
|||
TargetLoweringOpt &TLO,
|
||||
unsigned Depth = 0) const;
|
||||
|
||||
/// This method returns the constant pool value that will be loaded by LD.
|
||||
/// NOTE: You must check for implicit extensions of the constant by LD.
|
||||
virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const;
|
||||
|
||||
/// If \p SNaN is false, \returns true if \p Op is known to never be any
|
||||
/// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling
|
||||
/// NaN.
|
||||
|
|
|
@ -10110,7 +10110,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
|
|||
|
||||
// trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
|
||||
if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
|
||||
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
|
||||
(!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
|
||||
TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
|
||||
SDValue Amt = N0.getOperand(1);
|
||||
KnownBits Known = DAG.computeKnownBits(Amt);
|
||||
|
|
|
@ -2886,8 +2886,59 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
|
|||
}
|
||||
case ISD::LOAD: {
|
||||
LoadSDNode *LD = cast<LoadSDNode>(Op);
|
||||
const Constant *Cst = TLI->getTargetConstantFromLoad(LD);
|
||||
if (ISD::isNON_EXTLoad(LD) && Cst) {
|
||||
// Determine any common known bits from the loaded constant pool value.
|
||||
Type *CstTy = Cst->getType();
|
||||
if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits()) {
|
||||
// If its a vector splat, then we can (quickly) reuse the scalar path.
|
||||
// NOTE: We assume all elements match and none are UNDEF.
|
||||
if (CstTy->isVectorTy()) {
|
||||
if (const Constant *Splat = Cst->getSplatValue()) {
|
||||
Cst = Splat;
|
||||
CstTy = Cst->getType();
|
||||
}
|
||||
}
|
||||
// TODO - do we need to handle different bitwidths?
|
||||
if (CstTy->isVectorTy() && BitWidth == CstTy->getScalarSizeInBits()) {
|
||||
// Iterate across all vector elements finding common known bits.
|
||||
Known.One.setAllBits();
|
||||
Known.Zero.setAllBits();
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
if (!DemandedElts[i])
|
||||
continue;
|
||||
if (Constant *Elt = Cst->getAggregateElement(i)) {
|
||||
if (auto *CInt = dyn_cast<ConstantInt>(Elt)) {
|
||||
const APInt &Value = CInt->getValue();
|
||||
Known.One &= Value;
|
||||
Known.Zero &= ~Value;
|
||||
continue;
|
||||
}
|
||||
if (auto *CFP = dyn_cast<ConstantFP>(Elt)) {
|
||||
APInt Value = CFP->getValueAPF().bitcastToAPInt();
|
||||
Known.One &= Value;
|
||||
Known.Zero &= ~Value;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Known.One.clearAllBits();
|
||||
Known.Zero.clearAllBits();
|
||||
break;
|
||||
}
|
||||
} else if (BitWidth == CstTy->getPrimitiveSizeInBits()) {
|
||||
if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
|
||||
const APInt &Value = CInt->getValue();
|
||||
Known.One = Value;
|
||||
Known.Zero = ~Value;
|
||||
} else if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
|
||||
APInt Value = CFP->getValueAPF().bitcastToAPInt();
|
||||
Known.One = Value;
|
||||
Known.Zero = ~Value;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
|
||||
// If this is a ZEXTLoad and we are looking at the loaded value.
|
||||
if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
|
||||
EVT VT = LD->getMemoryVT();
|
||||
unsigned MemBits = VT.getScalarSizeInBits();
|
||||
Known.Zero.setBitsFrom(MemBits);
|
||||
|
|
|
@ -659,6 +659,14 @@ bool TargetLowering::SimplifyDemandedBits(
|
|||
Known.Zero &= Known2.Zero;
|
||||
}
|
||||
return false; // Don't fall through, will infinitely loop.
|
||||
case ISD::LOAD: {
|
||||
LoadSDNode *LD = cast<LoadSDNode>(Op);
|
||||
if (getTargetConstantFromLoad(LD)) {
|
||||
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
|
||||
return false; // Don't fall through, will infinitely loop.
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ISD::INSERT_VECTOR_ELT: {
|
||||
SDValue Vec = Op.getOperand(0);
|
||||
SDValue Scl = Op.getOperand(1);
|
||||
|
@ -2314,6 +2322,10 @@ bool TargetLowering::SimplifyDemandedBitsForTargetNode(
|
|||
return false;
|
||||
}
|
||||
|
||||
const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
|
||||
const SelectionDAG &DAG,
|
||||
bool SNaN,
|
||||
|
|
|
@ -5731,10 +5731,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
|
|||
return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
|
||||
}
|
||||
|
||||
static const Constant *getTargetConstantFromNode(SDValue Op) {
|
||||
Op = peekThroughBitcasts(Op);
|
||||
|
||||
auto *Load = dyn_cast<LoadSDNode>(Op);
|
||||
static const Constant *getTargetConstantFromNode(LoadSDNode *Load) {
|
||||
if (!Load)
|
||||
return nullptr;
|
||||
|
||||
|
@ -5750,6 +5747,17 @@ static const Constant *getTargetConstantFromNode(SDValue Op) {
|
|||
return CNode->getConstVal();
|
||||
}
|
||||
|
||||
static const Constant *getTargetConstantFromNode(SDValue Op) {
|
||||
Op = peekThroughBitcasts(Op);
|
||||
return getTargetConstantFromNode(dyn_cast<LoadSDNode>(Op));
|
||||
}
|
||||
|
||||
const Constant *
|
||||
X86TargetLowering::getTargetConstantFromLoad(LoadSDNode *LD) const {
|
||||
assert(LD && "Unexpected null LoadSDNode");
|
||||
return getTargetConstantFromNode(LD);
|
||||
}
|
||||
|
||||
// Extract raw constant bits from constant pools.
|
||||
static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
|
||||
APInt &UndefElts,
|
||||
|
|
|
@ -908,6 +908,8 @@ namespace llvm {
|
|||
TargetLoweringOpt &TLO,
|
||||
unsigned Depth) const override;
|
||||
|
||||
const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
|
||||
|
||||
SDValue unwrapAddress(SDValue N) const override;
|
||||
|
||||
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
|
||||
|
|
|
@ -940,9 +940,8 @@ define <2 x i64> @test46(<2 x float> %x, <2 x float> %y) #0 {
|
|||
; AVX512-LABEL: test46:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc2,0xc1,0x00]
|
||||
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
|
||||
; AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x14,0xc1]
|
||||
; AVX512-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; AVX512-NEXT: vpermilps $212, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xd4]
|
||||
; AVX512-NEXT: ## xmm0 = xmm0[0,1,1,3]
|
||||
; AVX512-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x54,0x05,A,A,A,A]
|
||||
; AVX512-NEXT: ## fixup A - offset: 4, value: LCPI47_0-4, kind: reloc_riprel_4byte
|
||||
; AVX512-NEXT: retq ## encoding: [0xc3]
|
||||
|
|
|
@ -61,31 +61,25 @@ define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) nounwind {
|
|||
; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
|
||||
; X64-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,6,5,4]
|
||||
; X64-NEXT: packuswb %xmm2, %xmm1
|
||||
; X64-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; X64-NEXT: movdqa %xmm1, %xmm2
|
||||
; X64-NEXT: pand %xmm0, %xmm2
|
||||
; X64-NEXT: psllw $4, %xmm2
|
||||
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
|
||||
; X64-NEXT: packuswb %xmm2, %xmm0
|
||||
; X64-NEXT: movdqa %xmm0, %xmm1
|
||||
; X64-NEXT: psllw $4, %xmm1
|
||||
; X64-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; X64-NEXT: psrlw $4, %xmm1
|
||||
; X64-NEXT: pand %xmm0, %xmm1
|
||||
; X64-NEXT: pandn %xmm2, %xmm0
|
||||
; X64-NEXT: psrlw $4, %xmm0
|
||||
; X64-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: por %xmm1, %xmm0
|
||||
; X64-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
|
||||
; X64-NEXT: pand %xmm0, %xmm1
|
||||
; X64-NEXT: psllw $2, %xmm1
|
||||
; X64-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; X64-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: psrlw $2, %xmm0
|
||||
; X64-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: por %xmm1, %xmm0
|
||||
; X64-NEXT: movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
|
||||
; X64-NEXT: pand %xmm0, %xmm1
|
||||
; X64-NEXT: paddb %xmm1, %xmm1
|
||||
; X64-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: psrlw $1, %xmm0
|
||||
; X64-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: por %xmm1, %xmm0
|
||||
; X64-NEXT: psrlq $48, %xmm0
|
||||
; X64-NEXT: retq
|
||||
|
|
|
@ -47,31 +47,25 @@ define <4 x i32> @test_demandedbits_bitreverse(<4 x i32> %a0) nounwind {
|
|||
; X86-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
|
||||
; X86-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; X86-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
|
||||
; X86-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,6,5,4]
|
||||
; X86-NEXT: packuswb %xmm2, %xmm1
|
||||
; X86-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; X86-NEXT: movdqa %xmm1, %xmm2
|
||||
; X86-NEXT: pand %xmm0, %xmm2
|
||||
; X86-NEXT: psllw $4, %xmm2
|
||||
; X86-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
|
||||
; X86-NEXT: packuswb %xmm2, %xmm0
|
||||
; X86-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-NEXT: psllw $4, %xmm1
|
||||
; X86-NEXT: pand {{\.LCPI.*}}, %xmm1
|
||||
; X86-NEXT: psrlw $4, %xmm1
|
||||
; X86-NEXT: pand %xmm0, %xmm1
|
||||
; X86-NEXT: pandn %xmm2, %xmm0
|
||||
; X86-NEXT: psrlw $4, %xmm0
|
||||
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
|
||||
; X86-NEXT: por %xmm1, %xmm0
|
||||
; X86-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
|
||||
; X86-NEXT: pand %xmm0, %xmm1
|
||||
; X86-NEXT: psllw $2, %xmm1
|
||||
; X86-NEXT: pand {{\.LCPI.*}}, %xmm1
|
||||
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
|
||||
; X86-NEXT: psrlw $2, %xmm0
|
||||
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
|
||||
; X86-NEXT: por %xmm1, %xmm0
|
||||
; X86-NEXT: movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
|
||||
; X86-NEXT: pand %xmm0, %xmm1
|
||||
; X86-NEXT: paddb %xmm1, %xmm1
|
||||
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
|
||||
; X86-NEXT: psrlw $1, %xmm0
|
||||
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
|
||||
; X86-NEXT: por %xmm1, %xmm0
|
||||
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
|
||||
; X86-NEXT: retl
|
||||
|
|
|
@ -268,16 +268,11 @@ define <8 x i32> @combine_vec_shl_ext_shl1(<8 x i16> %x) {
|
|||
; SSE2-LABEL: combine_vec_shl_ext_shl1:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; SSE2-NEXT: psrad $16, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE2-NEXT: pslld $29, %xmm2
|
||||
; SSE2-NEXT: pslld $28, %xmm1
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
|
||||
; SSE2-NEXT: pslld $30, %xmm0
|
||||
; SSE2-NEXT: xorpd %xmm2, %xmm2
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
|
||||
; SSE2-NEXT: xorpd %xmm1, %xmm1
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_vec_shl_ext_shl1:
|
||||
|
|
|
@ -1309,7 +1309,6 @@ define i1 @allones_v32i8_and1(<32 x i8> %arg) {
|
|||
; AVX1-NEXT: vpmovmskb %xmm1, %eax
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
||||
; AVX1-NEXT: shll $16, %ecx
|
||||
; AVX1-NEXT: orl %eax, %ecx
|
||||
|
@ -1368,7 +1367,6 @@ define i1 @allzeros_v32i8_and1(<32 x i8> %arg) {
|
|||
; AVX1-NEXT: vpmovmskb %xmm1, %eax
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
||||
; AVX1-NEXT: shll $16, %ecx
|
||||
; AVX1-NEXT: orl %eax, %ecx
|
||||
|
@ -1432,8 +1430,6 @@ define i1 @allones_v64i8_and1(<64 x i8> %arg) {
|
|||
; AVX1-LABEL: allones_v64i8_and1:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpmovmskb %xmm2, %eax
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
|
@ -1441,7 +1437,6 @@ define i1 @allones_v64i8_and1(<64 x i8> %arg) {
|
|||
; AVX1-NEXT: shll $16, %ecx
|
||||
; AVX1-NEXT: orl %eax, %ecx
|
||||
; AVX1-NEXT: vpsllw $7, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
|
@ -1518,8 +1513,6 @@ define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
|
|||
; AVX1-LABEL: allzeros_v64i8_and1:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpmovmskb %xmm2, %eax
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
|
@ -1527,7 +1520,6 @@ define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
|
|||
; AVX1-NEXT: shll $16, %ecx
|
||||
; AVX1-NEXT: orl %eax, %ecx
|
||||
; AVX1-NEXT: vpsllw $7, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
|
@ -2728,7 +2720,6 @@ define i1 @allones_v32i8_and4(<32 x i8> %arg) {
|
|||
; AVX1-NEXT: vpmovmskb %xmm1, %eax
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
||||
; AVX1-NEXT: shll $16, %ecx
|
||||
; AVX1-NEXT: orl %eax, %ecx
|
||||
|
@ -2787,7 +2778,6 @@ define i1 @allzeros_v32i8_and4(<32 x i8> %arg) {
|
|||
; AVX1-NEXT: vpmovmskb %xmm1, %eax
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
||||
; AVX1-NEXT: shll $16, %ecx
|
||||
; AVX1-NEXT: orl %eax, %ecx
|
||||
|
@ -2851,8 +2841,6 @@ define i1 @allones_v64i8_and4(<64 x i8> %arg) {
|
|||
; AVX1-LABEL: allones_v64i8_and4:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpsllw $5, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224]
|
||||
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpmovmskb %xmm2, %eax
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
|
||||
|
@ -2860,7 +2848,6 @@ define i1 @allones_v64i8_and4(<64 x i8> %arg) {
|
|||
; AVX1-NEXT: shll $16, %ecx
|
||||
; AVX1-NEXT: orl %eax, %ecx
|
||||
; AVX1-NEXT: vpsllw $5, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
|
||||
|
@ -2937,8 +2924,6 @@ define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
|
|||
; AVX1-LABEL: allzeros_v64i8_and4:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpsllw $5, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224]
|
||||
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpmovmskb %xmm2, %eax
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
|
||||
|
@ -2946,7 +2931,6 @@ define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
|
|||
; AVX1-NEXT: shll $16, %ecx
|
||||
; AVX1-NEXT: orl %eax, %ecx
|
||||
; AVX1-NEXT: vpsllw $5, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -414,10 +414,9 @@ define <2 x i64> @cmpgt_zext_v2i64(<2 x i64> %a, <2 x i64> %b) {
|
|||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
|
||||
; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; SSE2-NEXT: pand %xmm3, %xmm1
|
||||
; SSE2-NEXT: pand %xmm2, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
|
||||
; SSE2-NEXT: por %xmm1, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -339,9 +339,7 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
|
|||
; SSE2-LABEL: var_shift_v8i8:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
|
||||
; SSE2-NEXT: pand %xmm0, %xmm2
|
||||
; SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
|
||||
; SSE2-NEXT: psllw $12, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: psraw $15, %xmm0
|
||||
|
@ -505,9 +503,7 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
|
|||
; X32-SSE-LABEL: var_shift_v8i8:
|
||||
; X32-SSE: # %bb.0:
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
|
||||
; X32-SSE-NEXT: pand %xmm0, %xmm2
|
||||
; X32-SSE-NEXT: pand %xmm0, %xmm1
|
||||
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2
|
||||
; X32-SSE-NEXT: psllw $12, %xmm1
|
||||
; X32-SSE-NEXT: movdqa %xmm1, %xmm0
|
||||
; X32-SSE-NEXT: psraw $15, %xmm0
|
||||
|
@ -1122,11 +1118,9 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
|
|||
; SSE2-LABEL: splatvar_shift_v8i8:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
|
||||
; SSE2-NEXT: pand %xmm0, %xmm2
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
|
||||
; SSE2-NEXT: psllw $12, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: psraw $15, %xmm0
|
||||
|
@ -1287,11 +1281,9 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
|
|||
; X32-SSE-LABEL: splatvar_shift_v8i8:
|
||||
; X32-SSE: # %bb.0:
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
|
||||
; X32-SSE-NEXT: pand %xmm0, %xmm2
|
||||
; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; X32-SSE-NEXT: pand %xmm0, %xmm1
|
||||
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2
|
||||
; X32-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
|
||||
; X32-SSE-NEXT: psllw $12, %xmm1
|
||||
; X32-SSE-NEXT: movdqa %xmm1, %xmm0
|
||||
; X32-SSE-NEXT: psraw $15, %xmm0
|
||||
|
|
|
@ -1660,9 +1660,7 @@ define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
|
|||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
|
||||
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
|
||||
|
|
Loading…
Reference in New Issue