[X86] Use lock add/sub/or/and/xor for cases that we only care about the EFLAGS (negated cases)

This fixes #58685

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D138428
This commit is contained in:
Phoebe Wang 2022-11-23 09:05:59 +08:00
parent 8f0cd7c1d0
commit 7218103bca
2 changed files with 59 additions and 98 deletions

View File

@ -31421,36 +31421,47 @@ static bool shouldExpandCmpArithRMWInIR(AtomicRMWInst *AI) {
AtomicRMWInst::BinOp Opc = AI->getOperation();
if (Opc == AtomicRMWInst::Add) {
if (match(I, m_c_ICmp(Pred, m_Sub(m_ZeroInt(), m_Specific(Op)), m_Value())))
return Pred == CmpInst::ICMP_EQ;
if (match(I, m_OneUse(m_c_Add(m_Specific(Op), m_Value()))) &&
match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE;
if (match(I, m_OneUse(m_c_Add(m_Specific(Op), m_Value())))) {
if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
return Pred == CmpInst::ICMP_SLT;
if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_AllOnes())))
return Pred == CmpInst::ICMP_SGT;
}
return false;
}
if (Opc == AtomicRMWInst::Sub) {
if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value())))
return Pred == CmpInst::ICMP_EQ;
if (match(I, m_OneUse(m_Sub(m_Value(), m_Specific(Op)))) &&
match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE;
if (match(I, m_OneUse(m_Sub(m_Value(), m_Specific(Op))))) {
if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
return Pred == CmpInst::ICMP_SLT;
if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_AllOnes())))
return Pred == CmpInst::ICMP_SGT;
}
return false;
}
if (Opc == AtomicRMWInst::Or) {
if (match(I, m_OneUse(m_c_Or(m_Specific(Op), m_Value()))) &&
match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLT;
}
if (Opc == AtomicRMWInst::And) {
if (match(I, m_OneUse(m_c_And(m_Specific(Op), m_Value()))) &&
match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLT;
if ((Opc == AtomicRMWInst::Or &&
match(I, m_OneUse(m_c_Or(m_Specific(Op), m_Value())))) ||
(Opc == AtomicRMWInst::And &&
match(I, m_OneUse(m_c_And(m_Specific(Op), m_Value()))))) {
if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE ||
Pred == CmpInst::ICMP_SLT;
if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_AllOnes())))
return Pred == CmpInst::ICMP_SGT;
return false;
}
if (Opc == AtomicRMWInst::Xor) {
if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value())))
return Pred == CmpInst::ICMP_EQ;
if (match(I, m_OneUse(m_c_Xor(m_Specific(Op), m_Value()))) &&
match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE;
if (match(I, m_OneUse(m_c_Xor(m_Specific(Op), m_Value())))) {
if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
return Pred == CmpInst::ICMP_SLT;
if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_AllOnes())))
return Pred == CmpInst::ICMP_SGT;
}
return false;
}
return false;
@ -31467,10 +31478,24 @@ void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic(
assert(TempI->hasOneUse() && "Must have one use");
ICI = cast<ICmpInst>(TempI->user_back());
}
X86::CondCode CC = X86::COND_INVALID;
ICmpInst::Predicate Pred = ICI->getPredicate();
assert((Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLT) &&
"Not supported Pred");
X86::CondCode CC = Pred == CmpInst::ICMP_EQ ? X86::COND_E : X86::COND_S;
switch (Pred) {
default:
llvm_unreachable("Not supported Pred");
case CmpInst::ICMP_EQ:
CC = X86::COND_E;
break;
case CmpInst::ICMP_NE:
CC = X86::COND_NE;
break;
case CmpInst::ICMP_SLT:
CC = X86::COND_S;
break;
case CmpInst::ICMP_SGT:
CC = X86::COND_NS;
break;
}
Intrinsic::ID IID = Intrinsic::not_intrinsic;
switch (AI->getOperation()) {
default:

View File

@ -122,9 +122,7 @@ define i1 @lock_xor_sets(ptr %0, i32 %1) nounwind {
define i1 @lock_add_setne(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_add_setne:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: lock xaddl %eax, (%rdi)
; CHECK-NEXT: addl %esi, %eax
; CHECK-NEXT: lock addl %esi, (%rdi)
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%3 = atomicrmw add ptr %0, i32 %1 seq_cst, align 4
@ -136,9 +134,7 @@ define i1 @lock_add_setne(ptr %0, i32 %1) nounwind {
define i1 @lock_add_setns(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_add_setns:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: lock xaddl %eax, (%rdi)
; CHECK-NEXT: addl %esi, %eax
; CHECK-NEXT: lock addl %esi, (%rdi)
; CHECK-NEXT: setns %al
; CHECK-NEXT: retq
%3 = atomicrmw add ptr %0, i32 %1 seq_cst, align 4
@ -150,10 +146,7 @@ define i1 @lock_add_setns(ptr %0, i32 %1) nounwind {
define i1 @lock_sub_setne(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_sub_setne:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: negl %eax
; CHECK-NEXT: lock xaddl %eax, (%rdi)
; CHECK-NEXT: cmpl %esi, %eax
; CHECK-NEXT: lock subl %esi, (%rdi)
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%3 = atomicrmw sub ptr %0, i32 %1 seq_cst, align 4
@ -164,10 +157,7 @@ define i1 @lock_sub_setne(ptr %0, i32 %1) nounwind {
define i1 @lock_sub_setns(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_sub_setns:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: negl %eax
; CHECK-NEXT: lock xaddl %eax, (%rdi)
; CHECK-NEXT: cmpl %esi, %eax
; CHECK-NEXT: lock subl %esi, (%rdi)
; CHECK-NEXT: setns %al
; CHECK-NEXT: retq
%3 = atomicrmw sub ptr %0, i32 %1 seq_cst, align 4
@ -179,16 +169,7 @@ define i1 @lock_sub_setns(ptr %0, i32 %1) nounwind {
define i1 @lock_or_setne(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_or_setne:
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB14_1: # %atomicrmw.start
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: orl %esi, %ecx
; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
; CHECK-NEXT: jne .LBB14_1
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
; CHECK-NEXT: orl %esi, %eax
; CHECK-NEXT: lock orl %esi, (%rdi)
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%3 = atomicrmw or ptr %0, i32 %1 seq_cst, align 4
@ -200,16 +181,7 @@ define i1 @lock_or_setne(ptr %0, i32 %1) nounwind {
define i1 @lock_or_setns(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_or_setns:
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB15_1: # %atomicrmw.start
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: orl %esi, %ecx
; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
; CHECK-NEXT: jne .LBB15_1
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
; CHECK-NEXT: orl %esi, %eax
; CHECK-NEXT: lock orl %esi, (%rdi)
; CHECK-NEXT: setns %al
; CHECK-NEXT: retq
%3 = atomicrmw or ptr %0, i32 %1 seq_cst, align 4
@ -221,16 +193,7 @@ define i1 @lock_or_setns(ptr %0, i32 %1) nounwind {
define i1 @lock_and_setne(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_and_setne:
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB16_1: # %atomicrmw.start
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: andl %esi, %ecx
; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
; CHECK-NEXT: jne .LBB16_1
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
; CHECK-NEXT: testl %esi, %eax
; CHECK-NEXT: lock andl %esi, (%rdi)
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%3 = atomicrmw and ptr %0, i32 %1 seq_cst, align 4
@ -242,16 +205,7 @@ define i1 @lock_and_setne(ptr %0, i32 %1) nounwind {
define i1 @lock_and_setns(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_and_setns:
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB17_1: # %atomicrmw.start
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: andl %esi, %ecx
; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
; CHECK-NEXT: jne .LBB17_1
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
; CHECK-NEXT: testl %esi, %eax
; CHECK-NEXT: lock andl %esi, (%rdi)
; CHECK-NEXT: setns %al
; CHECK-NEXT: retq
%3 = atomicrmw and ptr %0, i32 %1 seq_cst, align 4
@ -263,16 +217,7 @@ define i1 @lock_and_setns(ptr %0, i32 %1) nounwind {
define i1 @lock_xor_setne(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_xor_setne:
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB18_1: # %atomicrmw.start
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: xorl %esi, %ecx
; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
; CHECK-NEXT: jne .LBB18_1
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
; CHECK-NEXT: cmpl %esi, %eax
; CHECK-NEXT: lock xorl %esi, (%rdi)
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%3 = atomicrmw xor ptr %0, i32 %1 seq_cst, align 4
@ -283,16 +228,7 @@ define i1 @lock_xor_setne(ptr %0, i32 %1) nounwind {
define i1 @lock_xor_setns(ptr %0, i32 %1) nounwind {
; CHECK-LABEL: lock_xor_setns:
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB19_1: # %atomicrmw.start
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: xorl %esi, %ecx
; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
; CHECK-NEXT: jne .LBB19_1
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
; CHECK-NEXT: xorl %esi, %eax
; CHECK-NEXT: lock xorl %esi, (%rdi)
; CHECK-NEXT: setns %al
; CHECK-NEXT: retq
%3 = atomicrmw xor ptr %0, i32 %1 seq_cst, align 4