[X86] Fold scalar_to_vector(i64 zext(x)) -> bitcast(vzext_movl(scalar_to_vector(i32 x)))

Extends existing anyextend fold to make use of the implicit zero-extension of the movd instruction

This also helps replace some nasty xmm->gpr->xmm traffic with a shuffle pattern instead

Noticed while looking at D130953
This commit is contained in:
Simon Pilgrim 2022-10-21 10:39:57 +01:00
parent daf067da04
commit 5ca7754144
10 changed files with 120 additions and 111 deletions

View File

@ -55036,25 +55036,37 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src.getOperand(0),
Src.getOperand(1));
// Reduce v2i64 to v4i32 if we don't need the upper bits.
// Reduce v2i64 to v4i32 if we don't need the upper bits or are known zero.
// TODO: Move to DAGCombine/SimplifyDemandedBits?
if (VT == MVT::v2i64 || VT == MVT::v2f64) {
auto IsAnyExt64 = [](SDValue Op) {
if (Op.getValueType() != MVT::i64 || !Op.hasOneUse())
if ((VT == MVT::v2i64 || VT == MVT::v2f64) && Src.hasOneUse()) {
auto IsExt64 = [&DAG](SDValue Op, bool IsZeroExt) {
if (Op.getValueType() != MVT::i64)
return SDValue();
if (Op.getOpcode() == ISD::ANY_EXTEND &&
unsigned Opc = IsZeroExt ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND;
if (Op.getOpcode() == Opc &&
Op.getOperand(0).getScalarValueSizeInBits() <= 32)
return Op.getOperand(0);
unsigned Ext = IsZeroExt ? ISD::ZEXTLOAD : ISD::EXTLOAD;
if (auto *Ld = dyn_cast<LoadSDNode>(Op))
if (Ld->getExtensionType() == ISD::EXTLOAD &&
if (Ld->getExtensionType() == Ext &&
Ld->getMemoryVT().getScalarSizeInBits() <= 32)
return Op;
if (IsZeroExt && DAG.MaskedValueIsZero(Op, APInt::getHighBitsSet(64, 32)))
return Op;
return SDValue();
};
if (SDValue ExtSrc = IsAnyExt64(peekThroughOneUseBitcasts(Src)))
if (SDValue AnyExt = IsExt64(peekThroughOneUseBitcasts(Src), false))
return DAG.getBitcast(
VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32,
DAG.getAnyExtOrTrunc(ExtSrc, DL, MVT::i32)));
DAG.getAnyExtOrTrunc(AnyExt, DL, MVT::i32)));
if (SDValue ZeroExt = IsExt64(peekThroughOneUseBitcasts(Src), true))
return DAG.getBitcast(
VT,
DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v4i32,
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32,
DAG.getZExtOrTrunc(ZeroExt, DL, MVT::i32))));
}
// Combine (v2i64 (scalar_to_vector (i64 (bitconvert (mmx))))) to MOVQ2DQ.

View File

@ -198,17 +198,25 @@ define <2 x i64> @extract0_i32_zext_insert1_i64_undef(<4 x i32> %x) {
}
define <2 x i64> @extract0_i32_zext_insert1_i64_zero(<4 x i32> %x) {
; SSE-LABEL: extract0_i32_zext_insert1_i64_zero:
; SSE: # %bb.0:
; SSE-NEXT: movd %xmm0, %eax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE-NEXT: retq
; SSE2-LABEL: extract0_i32_zext_insert1_i64_zero:
; SSE2: # %bb.0:
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract0_i32_zext_insert1_i64_zero:
; SSE41: # %bb.0:
; SSE41-NEXT: pxor %xmm1, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE41-NEXT: retq
;
; AVX-LABEL: extract0_i32_zext_insert1_i64_zero:
; AVX: # %bb.0:
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 0
@ -242,24 +250,18 @@ define <2 x i64> @extract1_i32_zext_insert1_i64_undef(<4 x i32> %x) {
define <2 x i64> @extract1_i32_zext_insert1_i64_zero(<4 x i32> %x) {
; SSE2-LABEL: extract1_i32_zext_insert1_i64_zero:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: movq %rax, %xmm0
; SSE2-NEXT: psrlq $32, %xmm0
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract1_i32_zext_insert1_i64_zero:
; SSE41: # %bb.0:
; SSE41-NEXT: extractps $1, %xmm0, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: extract1_i32_zext_insert1_i64_zero:
; AVX: # %bb.0:
; AVX-NEXT: vextractps $1, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,zero,zero
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 1
%z = zext i32 %e to i64
@ -330,25 +332,15 @@ define <2 x i64> @extract3_i32_zext_insert1_i64_undef(<4 x i32> %x) {
}
define <2 x i64> @extract3_i32_zext_insert1_i64_zero(<4 x i32> %x) {
; SSE2-LABEL: extract3_i32_zext_insert1_i64_zero:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: movq %rax, %xmm0
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract3_i32_zext_insert1_i64_zero:
; SSE41: # %bb.0:
; SSE41-NEXT: extractps $3, %xmm0, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE41-NEXT: retq
; SSE-LABEL: extract3_i32_zext_insert1_i64_zero:
; SSE: # %bb.0:
; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE-NEXT: retq
;
; AVX-LABEL: extract3_i32_zext_insert1_i64_zero:
; AVX: # %bb.0:
; AVX-NEXT: vextractps $3, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 3
@ -538,17 +530,22 @@ define <2 x i64> @extract0_i16_zext_insert1_i64_undef(<8 x i16> %x) {
}
define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) {
; SSE-LABEL: extract0_i16_zext_insert1_i64_zero:
; SSE: # %bb.0:
; SSE-NEXT: pextrw $0, %xmm0, %eax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE-NEXT: retq
; SSE2-LABEL: extract0_i16_zext_insert1_i64_zero:
; SSE2: # %bb.0:
; SSE2-NEXT: pextrw $0, %xmm0, %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract0_i16_zext_insert1_i64_zero:
; SSE41: # %bb.0:
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE41-NEXT: retq
;
; AVX-LABEL: extract0_i16_zext_insert1_i64_zero:
; AVX: # %bb.0:
; AVX-NEXT: vpextrw $0, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 0
@ -581,18 +578,21 @@ define <2 x i64> @extract1_i16_zext_insert1_i64_undef(<8 x i16> %x) {
}
define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) {
; SSE-LABEL: extract1_i16_zext_insert1_i64_zero:
; SSE: # %bb.0:
; SSE-NEXT: pextrw $1, %xmm0, %eax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE-NEXT: retq
; SSE2-LABEL: extract1_i16_zext_insert1_i64_zero:
; SSE2: # %bb.0:
; SSE2-NEXT: pextrw $1, %xmm0, %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract1_i16_zext_insert1_i64_zero:
; SSE41: # %bb.0:
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: extract1_i16_zext_insert1_i64_zero:
; AVX: # %bb.0:
; AVX-NEXT: vpextrw $1, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 1
%z = zext i16 %e to i64
@ -628,18 +628,21 @@ define <2 x i64> @extract2_i16_zext_insert1_i64_undef(<8 x i16> %x) {
}
define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) {
; SSE-LABEL: extract2_i16_zext_insert1_i64_zero:
; SSE: # %bb.0:
; SSE-NEXT: pextrw $2, %xmm0, %eax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE-NEXT: retq
; SSE2-LABEL: extract2_i16_zext_insert1_i64_zero:
; SSE2: # %bb.0:
; SSE2-NEXT: pextrw $2, %xmm0, %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract2_i16_zext_insert1_i64_zero:
; SSE41: # %bb.0:
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[4,5],zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: extract2_i16_zext_insert1_i64_zero:
; AVX: # %bb.0:
; AVX-NEXT: vpextrw $2, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[4,5],zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 2
%z = zext i16 %e to i64
@ -674,18 +677,20 @@ define <2 x i64> @extract3_i16_zext_insert1_i64_undef(<8 x i16> %x) {
}
define <2 x i64> @extract3_i16_zext_insert1_i64_zero(<8 x i16> %x) {
; SSE-LABEL: extract3_i16_zext_insert1_i64_zero:
; SSE: # %bb.0:
; SSE-NEXT: pextrw $3, %xmm0, %eax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE-NEXT: retq
; SSE2-LABEL: extract3_i16_zext_insert1_i64_zero:
; SSE2: # %bb.0:
; SSE2-NEXT: psrlq $48, %xmm0
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract3_i16_zext_insert1_i64_zero:
; SSE41: # %bb.0:
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7],zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: extract3_i16_zext_insert1_i64_zero:
; AVX: # %bb.0:
; AVX-NEXT: vpextrw $3, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7],zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 3
%z = zext i16 %e to i64

View File

@ -332,8 +332,7 @@ declare dso_local i32 @foo(i32, i32, i32, i32)
define <8 x i32> @PR49658_zext(ptr %ptr, i32 %mul) {
; SSE-LABEL: PR49658_zext:
; SSE: # %bb.0: # %start
; SSE-NEXT: movl %esi, %eax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: movd %esi, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
; SSE-NEXT: pxor %xmm0, %xmm0
; SSE-NEXT: movq $-2097152, %rax # imm = 0xFFE00000

View File

@ -15,8 +15,7 @@
define <8 x i16> @bitcast_crash(i32 %arg, <8 x i16> %x, i1 %c) {
; CHECK-LABEL: bitcast_crash:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: movq %rax, %xmm1
; CHECK-NEXT: movd %edi, %xmm1
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; CHECK-NEXT: testb $1, %sil
; CHECK-NEXT: je .LBB0_2

View File

@ -149,11 +149,11 @@ define <4 x i64> @old(ptr %p, ptr %i, ptr %h, i64 %f) nounwind {
; LIN-SSE2-NEXT: andl %ecx, %edx
; LIN-SSE2-NEXT: andl %ecx, %esi
; LIN-SSE2-NEXT: andl %ecx, %edi
; LIN-SSE2-NEXT: movq %rax, %xmm0
; LIN-SSE2-NEXT: movq %rdx, %xmm1
; LIN-SSE2-NEXT: movd %eax, %xmm0
; LIN-SSE2-NEXT: movd %edx, %xmm1
; LIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; LIN-SSE2-NEXT: movq %rdi, %xmm2
; LIN-SSE2-NEXT: movq %rsi, %xmm1
; LIN-SSE2-NEXT: movd %edi, %xmm2
; LIN-SSE2-NEXT: movd %esi, %xmm1
; LIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; LIN-SSE2-NEXT: retq
;
@ -169,11 +169,11 @@ define <4 x i64> @old(ptr %p, ptr %i, ptr %h, i64 %f) nounwind {
; LIN-SSE4-NEXT: andl %ecx, %edx
; LIN-SSE4-NEXT: andl %ecx, %esi
; LIN-SSE4-NEXT: andl %ecx, %edi
; LIN-SSE4-NEXT: movq %rdx, %xmm1
; LIN-SSE4-NEXT: movq %rax, %xmm0
; LIN-SSE4-NEXT: movd %edx, %xmm1
; LIN-SSE4-NEXT: movd %eax, %xmm0
; LIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; LIN-SSE4-NEXT: movq %rdi, %xmm2
; LIN-SSE4-NEXT: movq %rsi, %xmm1
; LIN-SSE4-NEXT: movd %edi, %xmm2
; LIN-SSE4-NEXT: movd %esi, %xmm1
; LIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; LIN-SSE4-NEXT: retq
;
@ -192,11 +192,11 @@ define <4 x i64> @old(ptr %p, ptr %i, ptr %h, i64 %f) nounwind {
; WIN-SSE2-NEXT: andl %r9d, %ecx
; WIN-SSE2-NEXT: andl %r9d, %edx
; WIN-SSE2-NEXT: andl %r9d, %r8d
; WIN-SSE2-NEXT: movq %rax, %xmm0
; WIN-SSE2-NEXT: movq %rcx, %xmm1
; WIN-SSE2-NEXT: movd %eax, %xmm0
; WIN-SSE2-NEXT: movd %ecx, %xmm1
; WIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; WIN-SSE2-NEXT: movq %r8, %xmm2
; WIN-SSE2-NEXT: movq %rdx, %xmm1
; WIN-SSE2-NEXT: movd %r8d, %xmm2
; WIN-SSE2-NEXT: movd %edx, %xmm1
; WIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; WIN-SSE2-NEXT: retq
;
@ -212,11 +212,11 @@ define <4 x i64> @old(ptr %p, ptr %i, ptr %h, i64 %f) nounwind {
; WIN-SSE4-NEXT: andl %r9d, %ecx
; WIN-SSE4-NEXT: andl %r9d, %edx
; WIN-SSE4-NEXT: andl %r9d, %r8d
; WIN-SSE4-NEXT: movq %rcx, %xmm1
; WIN-SSE4-NEXT: movq %rax, %xmm0
; WIN-SSE4-NEXT: movd %ecx, %xmm1
; WIN-SSE4-NEXT: movd %eax, %xmm0
; WIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; WIN-SSE4-NEXT: movq %r8, %xmm2
; WIN-SSE4-NEXT: movq %rdx, %xmm1
; WIN-SSE4-NEXT: movd %r8d, %xmm2
; WIN-SSE4-NEXT: movd %edx, %xmm1
; WIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; WIN-SSE4-NEXT: retq
;

View File

@ -1101,8 +1101,7 @@ define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind
; SSE41: # %bb.0:
; SSE41-NEXT: movapd %xmm0, %xmm2
; SSE41-NEXT: movddup {{.*#+}} xmm1 = xmm1[0,0]
; SSE41-NEXT: movl %edi, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: movd %edi, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
@ -1112,8 +1111,7 @@ define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind
; AVX1-LABEL: arg_f64_v2f64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
; AVX1-NEXT: movl %edi, %eax
; AVX1-NEXT: vmovq %rax, %xmm2
; AVX1-NEXT: vmovd %edi, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
@ -1458,8 +1456,7 @@ define <2 x double> @load_f64_v2f64(<2 x double> %v, ptr %p, i32 %y) nounwind {
; SSE41: # %bb.0:
; SSE41-NEXT: movapd %xmm0, %xmm1
; SSE41-NEXT: movddup {{.*#+}} xmm2 = mem[0,0]
; SSE41-NEXT: movl %esi, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: movd %esi, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
@ -1469,8 +1466,7 @@ define <2 x double> @load_f64_v2f64(<2 x double> %v, ptr %p, i32 %y) nounwind {
; AVX1-LABEL: load_f64_v2f64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; AVX1-NEXT: movl %esi, %eax
; AVX1-NEXT: vmovq %rax, %xmm2
; AVX1-NEXT: vmovd %esi, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0

View File

@ -255,7 +255,7 @@ define <2 x i64> @lshr_op0_constant(ptr %p) nounwind {
; SSE-NEXT: movzbl (%rdi), %ecx
; SSE-NEXT: movl $42, %eax
; SSE-NEXT: shrq %cl, %rax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: lshr_op0_constant:
@ -263,7 +263,7 @@ define <2 x i64> @lshr_op0_constant(ptr %p) nounwind {
; AVX-NEXT: movzbl (%rdi), %ecx
; AVX-NEXT: movl $42, %eax
; AVX-NEXT: shrq %cl, %rax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq
%x = load i64, ptr %p
%b = lshr i64 42, %x

View File

@ -14,8 +14,7 @@ define x86_mmx @mmx_movzl(x86_mmx %x) nounwind {
;
; X64-LABEL: mmx_movzl:
; X64: ## %bb.0:
; X64-NEXT: movl $32, %eax
; X64-NEXT: movq %rax, %xmm0
; X64-NEXT: movaps {{.*#+}} xmm0 = [32,0,0,0]
; X64-NEXT: retq
%tmp = bitcast x86_mmx %x to <2 x i32>
%tmp3 = insertelement <2 x i32> %tmp, i32 32, i32 0

View File

@ -38,7 +38,7 @@ define <2 x i64> @test2(i64 %arg) nounwind {
; X64-LABEL: test2:
; X64: # %bb.0:
; X64-NEXT: andl $1234567, %edi # imm = 0x12D687
; X64-NEXT: movq %rdi, %xmm0
; X64-NEXT: movd %edi, %xmm0
; X64-NEXT: retq
%A = and i64 %arg, 1234567
%B = insertelement <2 x i64> undef, i64 %A, i32 0

View File

@ -385,8 +385,7 @@ define <4 x float> @PR31296(ptr %in) {
;
; X64-LABEL: PR31296:
; X64: # %bb.0: # %entry
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: vmovq %rax, %xmm0
; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,mem[0]
; X64-NEXT: retq
entry: