[X86] Fold scalar_to_vector(i64 zext(x)) -> bitcast(vzext_movl(scalar_to_vector(i32 x)))
Extends existing anyextend fold to make use of the implicit zero-extension of the movd instruction This also helps replace some nasty xmm->gpr->xmm traffic with a shuffle pattern instead Noticed while looking at D130953
This commit is contained in:
parent
daf067da04
commit
5ca7754144
|
@ -55036,25 +55036,37 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
|
|||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src.getOperand(0),
|
||||
Src.getOperand(1));
|
||||
|
||||
// Reduce v2i64 to v4i32 if we don't need the upper bits.
|
||||
// Reduce v2i64 to v4i32 if we don't need the upper bits or are known zero.
|
||||
// TODO: Move to DAGCombine/SimplifyDemandedBits?
|
||||
if (VT == MVT::v2i64 || VT == MVT::v2f64) {
|
||||
auto IsAnyExt64 = [](SDValue Op) {
|
||||
if (Op.getValueType() != MVT::i64 || !Op.hasOneUse())
|
||||
if ((VT == MVT::v2i64 || VT == MVT::v2f64) && Src.hasOneUse()) {
|
||||
auto IsExt64 = [&DAG](SDValue Op, bool IsZeroExt) {
|
||||
if (Op.getValueType() != MVT::i64)
|
||||
return SDValue();
|
||||
if (Op.getOpcode() == ISD::ANY_EXTEND &&
|
||||
unsigned Opc = IsZeroExt ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND;
|
||||
if (Op.getOpcode() == Opc &&
|
||||
Op.getOperand(0).getScalarValueSizeInBits() <= 32)
|
||||
return Op.getOperand(0);
|
||||
unsigned Ext = IsZeroExt ? ISD::ZEXTLOAD : ISD::EXTLOAD;
|
||||
if (auto *Ld = dyn_cast<LoadSDNode>(Op))
|
||||
if (Ld->getExtensionType() == ISD::EXTLOAD &&
|
||||
if (Ld->getExtensionType() == Ext &&
|
||||
Ld->getMemoryVT().getScalarSizeInBits() <= 32)
|
||||
return Op;
|
||||
if (IsZeroExt && DAG.MaskedValueIsZero(Op, APInt::getHighBitsSet(64, 32)))
|
||||
return Op;
|
||||
return SDValue();
|
||||
};
|
||||
if (SDValue ExtSrc = IsAnyExt64(peekThroughOneUseBitcasts(Src)))
|
||||
|
||||
if (SDValue AnyExt = IsExt64(peekThroughOneUseBitcasts(Src), false))
|
||||
return DAG.getBitcast(
|
||||
VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32,
|
||||
DAG.getAnyExtOrTrunc(ExtSrc, DL, MVT::i32)));
|
||||
DAG.getAnyExtOrTrunc(AnyExt, DL, MVT::i32)));
|
||||
|
||||
if (SDValue ZeroExt = IsExt64(peekThroughOneUseBitcasts(Src), true))
|
||||
return DAG.getBitcast(
|
||||
VT,
|
||||
DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v4i32,
|
||||
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32,
|
||||
DAG.getZExtOrTrunc(ZeroExt, DL, MVT::i32))));
|
||||
}
|
||||
|
||||
// Combine (v2i64 (scalar_to_vector (i64 (bitconvert (mmx))))) to MOVQ2DQ.
|
||||
|
|
|
@ -198,17 +198,25 @@ define <2 x i64> @extract0_i32_zext_insert1_i64_undef(<4 x i32> %x) {
|
|||
}
|
||||
|
||||
define <2 x i64> @extract0_i32_zext_insert1_i64_zero(<4 x i32> %x) {
|
||||
; SSE-LABEL: extract0_i32_zext_insert1_i64_zero:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movd %xmm0, %eax
|
||||
; SSE-NEXT: movq %rax, %xmm0
|
||||
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: extract0_i32_zext_insert1_i64_zero:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: extract0_i32_zext_insert1_i64_zero:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
|
||||
; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract0_i32_zext_insert1_i64_zero:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovd %xmm0, %eax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: retq
|
||||
%e = extractelement <4 x i32> %x, i32 0
|
||||
|
@ -242,24 +250,18 @@ define <2 x i64> @extract1_i32_zext_insert1_i64_undef(<4 x i32> %x) {
|
|||
define <2 x i64> @extract1_i32_zext_insert1_i64_zero(<4 x i32> %x) {
|
||||
; SSE2-LABEL: extract1_i32_zext_insert1_i64_zero:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: movq %rax, %xmm0
|
||||
; SSE2-NEXT: psrlq $32, %xmm0
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: extract1_i32_zext_insert1_i64_zero:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: extractps $1, %xmm0, %eax
|
||||
; SSE41-NEXT: movq %rax, %xmm0
|
||||
; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,zero,zero
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract1_i32_zext_insert1_i64_zero:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vextractps $1, %xmm0, %eax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
%e = extractelement <4 x i32> %x, i32 1
|
||||
%z = zext i32 %e to i64
|
||||
|
@ -330,25 +332,15 @@ define <2 x i64> @extract3_i32_zext_insert1_i64_undef(<4 x i32> %x) {
|
|||
}
|
||||
|
||||
define <2 x i64> @extract3_i32_zext_insert1_i64_zero(<4 x i32> %x) {
|
||||
; SSE2-LABEL: extract3_i32_zext_insert1_i64_zero:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: movq %rax, %xmm0
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: extract3_i32_zext_insert1_i64_zero:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: extractps $3, %xmm0, %eax
|
||||
; SSE41-NEXT: movq %rax, %xmm0
|
||||
; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: extract3_i32_zext_insert1_i64_zero:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract3_i32_zext_insert1_i64_zero:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vextractps $3, %xmm0, %eax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: retq
|
||||
%e = extractelement <4 x i32> %x, i32 3
|
||||
|
@ -538,17 +530,22 @@ define <2 x i64> @extract0_i16_zext_insert1_i64_undef(<8 x i16> %x) {
|
|||
}
|
||||
|
||||
define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) {
|
||||
; SSE-LABEL: extract0_i16_zext_insert1_i64_zero:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pextrw $0, %xmm0, %eax
|
||||
; SSE-NEXT: movq %rax, %xmm0
|
||||
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: extract0_i16_zext_insert1_i64_zero:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pextrw $0, %xmm0, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: extract0_i16_zext_insert1_i64_zero:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract0_i16_zext_insert1_i64_zero:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpextrw $0, %xmm0, %eax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: retq
|
||||
%e = extractelement <8 x i16> %x, i32 0
|
||||
|
@ -581,18 +578,21 @@ define <2 x i64> @extract1_i16_zext_insert1_i64_undef(<8 x i16> %x) {
|
|||
}
|
||||
|
||||
define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) {
|
||||
; SSE-LABEL: extract1_i16_zext_insert1_i64_zero:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pextrw $1, %xmm0, %eax
|
||||
; SSE-NEXT: movq %rax, %xmm0
|
||||
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: extract1_i16_zext_insert1_i64_zero:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pextrw $1, %xmm0, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: extract1_i16_zext_insert1_i64_zero:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract1_i16_zext_insert1_i64_zero:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpextrw $1, %xmm0, %eax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
%e = extractelement <8 x i16> %x, i32 1
|
||||
%z = zext i16 %e to i64
|
||||
|
@ -628,18 +628,21 @@ define <2 x i64> @extract2_i16_zext_insert1_i64_undef(<8 x i16> %x) {
|
|||
}
|
||||
|
||||
define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) {
|
||||
; SSE-LABEL: extract2_i16_zext_insert1_i64_zero:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pextrw $2, %xmm0, %eax
|
||||
; SSE-NEXT: movq %rax, %xmm0
|
||||
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: extract2_i16_zext_insert1_i64_zero:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pextrw $2, %xmm0, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: extract2_i16_zext_insert1_i64_zero:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[4,5],zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract2_i16_zext_insert1_i64_zero:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpextrw $2, %xmm0, %eax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[4,5],zero,zero,zero,zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
%e = extractelement <8 x i16> %x, i32 2
|
||||
%z = zext i16 %e to i64
|
||||
|
@ -674,18 +677,20 @@ define <2 x i64> @extract3_i16_zext_insert1_i64_undef(<8 x i16> %x) {
|
|||
}
|
||||
|
||||
define <2 x i64> @extract3_i16_zext_insert1_i64_zero(<8 x i16> %x) {
|
||||
; SSE-LABEL: extract3_i16_zext_insert1_i64_zero:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pextrw $3, %xmm0, %eax
|
||||
; SSE-NEXT: movq %rax, %xmm0
|
||||
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; SSE-NEXT: retq
|
||||
; SSE2-LABEL: extract3_i16_zext_insert1_i64_zero:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: psrlq $48, %xmm0
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: extract3_i16_zext_insert1_i64_zero:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7],zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract3_i16_zext_insert1_i64_zero:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpextrw $3, %xmm0, %eax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7],zero,zero,zero,zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
%e = extractelement <8 x i16> %x, i32 3
|
||||
%z = zext i16 %e to i64
|
||||
|
|
|
@ -332,8 +332,7 @@ declare dso_local i32 @foo(i32, i32, i32, i32)
|
|||
define <8 x i32> @PR49658_zext(ptr %ptr, i32 %mul) {
|
||||
; SSE-LABEL: PR49658_zext:
|
||||
; SSE: # %bb.0: # %start
|
||||
; SSE-NEXT: movl %esi, %eax
|
||||
; SSE-NEXT: movq %rax, %xmm0
|
||||
; SSE-NEXT: movd %esi, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
|
||||
; SSE-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE-NEXT: movq $-2097152, %rax # imm = 0xFFE00000
|
||||
|
|
|
@ -15,8 +15,7 @@
|
|||
define <8 x i16> @bitcast_crash(i32 %arg, <8 x i16> %x, i1 %c) {
|
||||
; CHECK-LABEL: bitcast_crash:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: movq %rax, %xmm1
|
||||
; CHECK-NEXT: movd %edi, %xmm1
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
|
||||
; CHECK-NEXT: testb $1, %sil
|
||||
; CHECK-NEXT: je .LBB0_2
|
||||
|
|
|
@ -149,11 +149,11 @@ define <4 x i64> @old(ptr %p, ptr %i, ptr %h, i64 %f) nounwind {
|
|||
; LIN-SSE2-NEXT: andl %ecx, %edx
|
||||
; LIN-SSE2-NEXT: andl %ecx, %esi
|
||||
; LIN-SSE2-NEXT: andl %ecx, %edi
|
||||
; LIN-SSE2-NEXT: movq %rax, %xmm0
|
||||
; LIN-SSE2-NEXT: movq %rdx, %xmm1
|
||||
; LIN-SSE2-NEXT: movd %eax, %xmm0
|
||||
; LIN-SSE2-NEXT: movd %edx, %xmm1
|
||||
; LIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; LIN-SSE2-NEXT: movq %rdi, %xmm2
|
||||
; LIN-SSE2-NEXT: movq %rsi, %xmm1
|
||||
; LIN-SSE2-NEXT: movd %edi, %xmm2
|
||||
; LIN-SSE2-NEXT: movd %esi, %xmm1
|
||||
; LIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; LIN-SSE2-NEXT: retq
|
||||
;
|
||||
|
@ -169,11 +169,11 @@ define <4 x i64> @old(ptr %p, ptr %i, ptr %h, i64 %f) nounwind {
|
|||
; LIN-SSE4-NEXT: andl %ecx, %edx
|
||||
; LIN-SSE4-NEXT: andl %ecx, %esi
|
||||
; LIN-SSE4-NEXT: andl %ecx, %edi
|
||||
; LIN-SSE4-NEXT: movq %rdx, %xmm1
|
||||
; LIN-SSE4-NEXT: movq %rax, %xmm0
|
||||
; LIN-SSE4-NEXT: movd %edx, %xmm1
|
||||
; LIN-SSE4-NEXT: movd %eax, %xmm0
|
||||
; LIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; LIN-SSE4-NEXT: movq %rdi, %xmm2
|
||||
; LIN-SSE4-NEXT: movq %rsi, %xmm1
|
||||
; LIN-SSE4-NEXT: movd %edi, %xmm2
|
||||
; LIN-SSE4-NEXT: movd %esi, %xmm1
|
||||
; LIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; LIN-SSE4-NEXT: retq
|
||||
;
|
||||
|
@ -192,11 +192,11 @@ define <4 x i64> @old(ptr %p, ptr %i, ptr %h, i64 %f) nounwind {
|
|||
; WIN-SSE2-NEXT: andl %r9d, %ecx
|
||||
; WIN-SSE2-NEXT: andl %r9d, %edx
|
||||
; WIN-SSE2-NEXT: andl %r9d, %r8d
|
||||
; WIN-SSE2-NEXT: movq %rax, %xmm0
|
||||
; WIN-SSE2-NEXT: movq %rcx, %xmm1
|
||||
; WIN-SSE2-NEXT: movd %eax, %xmm0
|
||||
; WIN-SSE2-NEXT: movd %ecx, %xmm1
|
||||
; WIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; WIN-SSE2-NEXT: movq %r8, %xmm2
|
||||
; WIN-SSE2-NEXT: movq %rdx, %xmm1
|
||||
; WIN-SSE2-NEXT: movd %r8d, %xmm2
|
||||
; WIN-SSE2-NEXT: movd %edx, %xmm1
|
||||
; WIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; WIN-SSE2-NEXT: retq
|
||||
;
|
||||
|
@ -212,11 +212,11 @@ define <4 x i64> @old(ptr %p, ptr %i, ptr %h, i64 %f) nounwind {
|
|||
; WIN-SSE4-NEXT: andl %r9d, %ecx
|
||||
; WIN-SSE4-NEXT: andl %r9d, %edx
|
||||
; WIN-SSE4-NEXT: andl %r9d, %r8d
|
||||
; WIN-SSE4-NEXT: movq %rcx, %xmm1
|
||||
; WIN-SSE4-NEXT: movq %rax, %xmm0
|
||||
; WIN-SSE4-NEXT: movd %ecx, %xmm1
|
||||
; WIN-SSE4-NEXT: movd %eax, %xmm0
|
||||
; WIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; WIN-SSE4-NEXT: movq %r8, %xmm2
|
||||
; WIN-SSE4-NEXT: movq %rdx, %xmm1
|
||||
; WIN-SSE4-NEXT: movd %r8d, %xmm2
|
||||
; WIN-SSE4-NEXT: movd %edx, %xmm1
|
||||
; WIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; WIN-SSE4-NEXT: retq
|
||||
;
|
||||
|
|
|
@ -1101,8 +1101,7 @@ define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind
|
|||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movapd %xmm0, %xmm2
|
||||
; SSE41-NEXT: movddup {{.*#+}} xmm1 = xmm1[0,0]
|
||||
; SSE41-NEXT: movl %edi, %eax
|
||||
; SSE41-NEXT: movq %rax, %xmm0
|
||||
; SSE41-NEXT: movd %edi, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
|
||||
|
@ -1112,8 +1111,7 @@ define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind
|
|||
; AVX1-LABEL: arg_f64_v2f64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
|
||||
; AVX1-NEXT: movl %edi, %eax
|
||||
; AVX1-NEXT: vmovq %rax, %xmm2
|
||||
; AVX1-NEXT: vmovd %edi, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
|
||||
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
@ -1458,8 +1456,7 @@ define <2 x double> @load_f64_v2f64(<2 x double> %v, ptr %p, i32 %y) nounwind {
|
|||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movapd %xmm0, %xmm1
|
||||
; SSE41-NEXT: movddup {{.*#+}} xmm2 = mem[0,0]
|
||||
; SSE41-NEXT: movl %esi, %eax
|
||||
; SSE41-NEXT: movq %rax, %xmm0
|
||||
; SSE41-NEXT: movd %esi, %xmm0
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
|
||||
|
@ -1469,8 +1466,7 @@ define <2 x double> @load_f64_v2f64(<2 x double> %v, ptr %p, i32 %y) nounwind {
|
|||
; AVX1-LABEL: load_f64_v2f64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
|
||||
; AVX1-NEXT: movl %esi, %eax
|
||||
; AVX1-NEXT: vmovq %rax, %xmm2
|
||||
; AVX1-NEXT: vmovd %esi, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
|
||||
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
|
|
@ -255,7 +255,7 @@ define <2 x i64> @lshr_op0_constant(ptr %p) nounwind {
|
|||
; SSE-NEXT: movzbl (%rdi), %ecx
|
||||
; SSE-NEXT: movl $42, %eax
|
||||
; SSE-NEXT: shrq %cl, %rax
|
||||
; SSE-NEXT: movq %rax, %xmm0
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: lshr_op0_constant:
|
||||
|
@ -263,7 +263,7 @@ define <2 x i64> @lshr_op0_constant(ptr %p) nounwind {
|
|||
; AVX-NEXT: movzbl (%rdi), %ecx
|
||||
; AVX-NEXT: movl $42, %eax
|
||||
; AVX-NEXT: shrq %cl, %rax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%x = load i64, ptr %p
|
||||
%b = lshr i64 42, %x
|
||||
|
|
|
@ -14,8 +14,7 @@ define x86_mmx @mmx_movzl(x86_mmx %x) nounwind {
|
|||
;
|
||||
; X64-LABEL: mmx_movzl:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: movl $32, %eax
|
||||
; X64-NEXT: movq %rax, %xmm0
|
||||
; X64-NEXT: movaps {{.*#+}} xmm0 = [32,0,0,0]
|
||||
; X64-NEXT: retq
|
||||
%tmp = bitcast x86_mmx %x to <2 x i32>
|
||||
%tmp3 = insertelement <2 x i32> %tmp, i32 32, i32 0
|
||||
|
|
|
@ -38,7 +38,7 @@ define <2 x i64> @test2(i64 %arg) nounwind {
|
|||
; X64-LABEL: test2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: andl $1234567, %edi # imm = 0x12D687
|
||||
; X64-NEXT: movq %rdi, %xmm0
|
||||
; X64-NEXT: movd %edi, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%A = and i64 %arg, 1234567
|
||||
%B = insertelement <2 x i64> undef, i64 %A, i32 0
|
||||
|
|
|
@ -385,8 +385,7 @@ define <4 x float> @PR31296(ptr %in) {
|
|||
;
|
||||
; X64-LABEL: PR31296:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: movl (%rdi), %eax
|
||||
; X64-NEXT: vmovq %rax, %xmm0
|
||||
; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,mem[0]
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue