[SPARC] Add tail call support for 64-bit target

Extend SPARC tail call support, first introduced in D51206 (commit 1c235c3754),
to also cover 64-bit target.

Reviewed By: MaskRay

Differential Revision: https://reviews.llvm.org/D138741
This commit is contained in:
Koakuma 2022-11-26 23:29:05 -05:00 committed by Brad Smith
parent 4191fda69c
commit f63a19baf0
3 changed files with 226 additions and 104 deletions

View File

@ -770,7 +770,10 @@ bool SparcTargetLowering::IsEligibleForTailCallOptimization(
return false;
// Do not tail call opt if the stack is used to pass parameters.
if (CCInfo.getNextStackOffset() != 0)
// 64-bit targets have a slightly higher limit since the ABI requires
// to allocate some space even when all the parameters fit inside registers.
unsigned StackOffsetLimit = Subtarget->is64Bit() ? 48 : 0;
if (CCInfo.getNextStackOffset() > StackOffsetLimit)
return false;
// Do not tail call opt if either the callee or caller returns
@ -1189,20 +1192,21 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
SDValue Chain = CLI.Chain;
auto PtrVT = getPointerTy(DAG.getDataLayout());
// Sparc target does not yet support tail call optimization.
CLI.IsTailCall = false;
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
CCInfo.AnalyzeCallOperands(CLI.Outs, CC_Sparc64);
CLI.IsTailCall = CLI.IsTailCall && IsEligibleForTailCallOptimization(
CCInfo, CLI, DAG.getMachineFunction());
// Get the size of the outgoing arguments stack space requirement.
// The stack offset computed by CC_Sparc64 includes all arguments.
// Called functions expect 6 argument words to exist in the stack frame, used
// or not.
unsigned ArgsSize = std::max(6*8u, CCInfo.getNextStackOffset());
unsigned StackReserved = 6 * 8u;
unsigned ArgsSize = std::max(StackReserved, CCInfo.getNextStackOffset());
// Keep stack frames 16-byte aligned.
ArgsSize = alignTo(ArgsSize, 16);
@ -1211,10 +1215,13 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
if (CLI.IsVarArg)
fixupVariableFloatArgs(ArgLocs, CLI.Outs);
assert(!CLI.IsTailCall || ArgsSize == StackReserved);
// Adjust the stack pointer to make room for the arguments.
// FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
// with more than 6 arguments.
Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
if (!CLI.IsTailCall)
Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
// Collect the set of registers to pass to the function and their values.
// This will be emitted as a sequence of CopyToReg nodes glued to the call
@ -1274,10 +1281,16 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
DAG.getLoad(MVT::i64, DL, Store, HiPtrOff, MachinePointerInfo());
SDValue Lo64 =
DAG.getLoad(MVT::i64, DL, Store, LoPtrOff, MachinePointerInfo());
RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()),
Hi64));
RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()+1),
Lo64));
Register HiReg = VA.getLocReg();
Register LoReg = VA.getLocReg() + 1;
if (!CLI.IsTailCall) {
HiReg = toCallerWindow(HiReg);
LoReg = toCallerWindow(LoReg);
}
RegsToPass.push_back(std::make_pair(HiReg, Hi64));
RegsToPass.push_back(std::make_pair(LoReg, Lo64));
continue;
}
@ -1298,7 +1311,11 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
++i;
}
}
RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()), Arg));
Register Reg = VA.getLocReg();
if (!CLI.IsTailCall)
Reg = toCallerWindow(Reg);
RegsToPass.push_back(std::make_pair(Reg, Arg));
continue;
}
@ -1366,6 +1383,10 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
Ops.push_back(InGlue);
// Now the call itself.
if (CLI.IsTailCall) {
DAG.getMachineFunction().getFrameInfo().setHasTailCall();
return DAG.getNode(SPISD::TAIL_CALL, DL, MVT::Other, Ops);
}
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(SPISD::CALL, DL, NodeTys, Ops);
InGlue = Chain.getValue(1);

View File

@ -37,11 +37,9 @@ declare void @bar(...)
; V8-NEXT: mov %g1, %o7
; V9-LABEL: test_tail_call_with_return
; V9: save %sp
; V9: call foo
; V9-NEXT: nop
; V9: ret
; V9-NEXT: restore %g0, %o0, %o0
; V9: mov %o7, %g1
; V9-NEXT: call foo
; V9-NEXT: mov %g1, %o7
define i32 @test_tail_call_with_return() nounwind {
entry:

View File

@ -1,46 +1,72 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=sparc -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -mtriple=sparc -verify-machineinstrs | FileCheck %s --check-prefix=V8
; RUN: llc < %s -mtriple=sparcv9 -verify-machineinstrs | FileCheck %s --check-prefix=V9
define i32 @simple_leaf(i32 %i) #0 {
; CHECK-LABEL: simple_leaf:
; CHECK: ! %bb.0: ! %entry
; CHECK-NEXT: mov %o7, %g1
; CHECK-NEXT: call foo
; CHECK-NEXT: mov %g1, %o7
; V8-LABEL: simple_leaf:
; V8: ! %bb.0: ! %entry
; V8-NEXT: mov %o7, %g1
; V8-NEXT: call foo
; V8-NEXT: mov %g1, %o7
;
; V9-LABEL: simple_leaf:
; V9: ! %bb.0: ! %entry
; V9-NEXT: mov %o7, %g1
; V9-NEXT: call foo
; V9-NEXT: mov %g1, %o7
entry:
%call = tail call i32 @foo(i32 %i)
ret i32 %call
}
define i32 @simple_standard(i32 %i) #1 {
; CHECK-LABEL: simple_standard:
; CHECK: ! %bb.0: ! %entry
; CHECK-NEXT: save %sp, -96, %sp
; CHECK-NEXT: call foo
; CHECK-NEXT: restore
; V8-LABEL: simple_standard:
; V8: ! %bb.0: ! %entry
; V8-NEXT: save %sp, -96, %sp
; V8-NEXT: call foo
; V8-NEXT: restore
;
; V9-LABEL: simple_standard:
; V9: ! %bb.0: ! %entry
; V9-NEXT: save %sp, -128, %sp
; V9-NEXT: call foo
; V9-NEXT: restore
entry:
%call = tail call i32 @foo(i32 %i)
ret i32 %call
}
define i32 @extra_arg_leaf(i32 %i) #0 {
; CHECK-LABEL: extra_arg_leaf:
; CHECK: ! %bb.0: ! %entry
; CHECK-NEXT: mov 12, %o1
; CHECK-NEXT: mov %o7, %g1
; CHECK-NEXT: call foo2
; CHECK-NEXT: mov %g1, %o7
; V8-LABEL: extra_arg_leaf:
; V8: ! %bb.0: ! %entry
; V8-NEXT: mov 12, %o1
; V8-NEXT: mov %o7, %g1
; V8-NEXT: call foo2
; V8-NEXT: mov %g1, %o7
;
; V9-LABEL: extra_arg_leaf:
; V9: ! %bb.0: ! %entry
; V9-NEXT: mov 12, %o1
; V9-NEXT: mov %o7, %g1
; V9-NEXT: call foo2
; V9-NEXT: mov %g1, %o7
entry:
%call = tail call i32 @foo2(i32 %i, i32 12)
ret i32 %call
}
define i32 @extra_arg_standard(i32 %i) #1 {
; CHECK-LABEL: extra_arg_standard:
; CHECK: ! %bb.0: ! %entry
; CHECK-NEXT: save %sp, -96, %sp
; CHECK-NEXT: call foo2
; CHECK-NEXT: restore %g0, 12, %o1
; V8-LABEL: extra_arg_standard:
; V8: ! %bb.0: ! %entry
; V8-NEXT: save %sp, -96, %sp
; V8-NEXT: call foo2
; V8-NEXT: restore %g0, 12, %o1
;
; V9-LABEL: extra_arg_standard:
; V9: ! %bb.0: ! %entry
; V9-NEXT: save %sp, -128, %sp
; V9-NEXT: call foo2
; V9-NEXT: restore %g0, 12, %o1
entry:
%call = tail call i32 @foo2(i32 %i, i32 12)
ret i32 %call
@ -49,17 +75,31 @@ entry:
; Perform tail call optimization for external symbol.
define void @caller_extern(i8* %src) optsize #0 {
; CHECK-LABEL: caller_extern:
; CHECK: ! %bb.0: ! %entry
; CHECK-NEXT: sethi %hi(dest), %o1
; CHECK-NEXT: add %o1, %lo(dest), %o1
; CHECK-NEXT: mov 7, %o2
; CHECK-NEXT: mov %o0, %o3
; CHECK-NEXT: mov %o1, %o0
; CHECK-NEXT: mov %o3, %o1
; CHECK-NEXT: mov %o7, %g1
; CHECK-NEXT: call memcpy
; CHECK-NEXT: mov %g1, %o7
; V8-LABEL: caller_extern:
; V8: ! %bb.0: ! %entry
; V8-NEXT: sethi %hi(dest), %o1
; V8-NEXT: add %o1, %lo(dest), %o1
; V8-NEXT: mov 7, %o2
; V8-NEXT: mov %o0, %o3
; V8-NEXT: mov %o1, %o0
; V8-NEXT: mov %o3, %o1
; V8-NEXT: mov %o7, %g1
; V8-NEXT: call memcpy
; V8-NEXT: mov %g1, %o7
;
; V9-LABEL: caller_extern:
; V9: ! %bb.0: ! %entry
; V9-NEXT: sethi %h44(dest), %o1
; V9-NEXT: add %o1, %m44(dest), %o1
; V9-NEXT: sllx %o1, 12, %o1
; V9-NEXT: add %o1, %l44(dest), %o1
; V9-NEXT: mov 7, %o2
; V9-NEXT: mov %o0, %o3
; V9-NEXT: mov %o1, %o0
; V9-NEXT: mov %o3, %o1
; V9-NEXT: mov %o7, %g1
; V9-NEXT: call memcpy
; V9-NEXT: mov %g1, %o7
entry:
tail call void @llvm.memcpy.p0i8.p0i8.i32(
i8* getelementptr inbounds ([2 x i8],
@ -71,24 +111,38 @@ entry:
; Perform tail call optimization for function pointer.
define i32 @func_ptr_test(i32 ()* nocapture %func_ptr) #0 {
; CHECK-LABEL: func_ptr_test:
; CHECK: ! %bb.0: ! %entry
; CHECK-NEXT: jmp %o0
; CHECK-NEXT: nop
; V8-LABEL: func_ptr_test:
; V8: ! %bb.0: ! %entry
; V8-NEXT: jmp %o0
; V8-NEXT: nop
;
; V9-LABEL: func_ptr_test:
; V9: ! %bb.0: ! %entry
; V9-NEXT: jmp %o0
; V9-NEXT: nop
entry:
%call = tail call i32 %func_ptr() #1
ret i32 %call
}
define i32 @func_ptr_test2(i32 (i32, i32, i32)* nocapture %func_ptr,
; CHECK-LABEL: func_ptr_test2:
; CHECK: ! %bb.0: ! %entry
; CHECK-NEXT: save %sp, -96, %sp
; CHECK-NEXT: mov 10, %i3
; CHECK-NEXT: mov %i0, %i4
; CHECK-NEXT: mov %i1, %i0
; CHECK-NEXT: jmp %i4
; CHECK-NEXT: restore %g0, %i3, %o1
; V8-LABEL: func_ptr_test2:
; V8: ! %bb.0: ! %entry
; V8-NEXT: save %sp, -96, %sp
; V8-NEXT: mov 10, %i3
; V8-NEXT: mov %i0, %i4
; V8-NEXT: mov %i1, %i0
; V8-NEXT: jmp %i4
; V8-NEXT: restore %g0, %i3, %o1
;
; V9-LABEL: func_ptr_test2:
; V9: ! %bb.0: ! %entry
; V9-NEXT: save %sp, -128, %sp
; V9-NEXT: mov 10, %i3
; V9-NEXT: mov %i0, %i4
; V9-NEXT: mov %i1, %i0
; V9-NEXT: jmp %i4
; V9-NEXT: restore %g0, %i3, %o1
i32 %r, i32 %q) #1 {
entry:
%call = tail call i32 %func_ptr(i32 %r, i32 10, i32 %q) #1
@ -99,20 +153,35 @@ entry:
; Do not tail call optimize if stack is used to pass parameters.
define i32 @caller_args() #0 {
; CHECK-LABEL: caller_args:
; CHECK: ! %bb.0: ! %entry
; CHECK-NEXT: save %sp, -104, %sp
; CHECK-NEXT: mov 6, %i0
; CHECK-NEXT: mov %g0, %o0
; CHECK-NEXT: mov 1, %o1
; CHECK-NEXT: mov 2, %o2
; CHECK-NEXT: mov 3, %o3
; CHECK-NEXT: mov 4, %o4
; CHECK-NEXT: mov 5, %o5
; CHECK-NEXT: call foo7
; CHECK-NEXT: st %i0, [%sp+92]
; CHECK-NEXT: ret
; CHECK-NEXT: restore %g0, %o0, %o0
; V8-LABEL: caller_args:
; V8: ! %bb.0: ! %entry
; V8-NEXT: save %sp, -104, %sp
; V8-NEXT: mov 6, %i0
; V8-NEXT: mov %g0, %o0
; V8-NEXT: mov 1, %o1
; V8-NEXT: mov 2, %o2
; V8-NEXT: mov 3, %o3
; V8-NEXT: mov 4, %o4
; V8-NEXT: mov 5, %o5
; V8-NEXT: call foo7
; V8-NEXT: st %i0, [%sp+92]
; V8-NEXT: ret
; V8-NEXT: restore %g0, %o0, %o0
;
; V9-LABEL: caller_args:
; V9: ! %bb.0: ! %entry
; V9-NEXT: save %sp, -192, %sp
; V9-NEXT: mov 6, %i0
; V9-NEXT: mov 0, %o0
; V9-NEXT: mov 1, %o1
; V9-NEXT: mov 2, %o2
; V9-NEXT: mov 3, %o3
; V9-NEXT: mov 4, %o4
; V9-NEXT: mov 5, %o5
; V9-NEXT: call foo7
; V9-NEXT: stx %i0, [%sp+2223]
; V9-NEXT: ret
; V9-NEXT: restore %g0, %o0, %o0
entry:
%r = tail call i32 @foo7(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
ret i32 %r
@ -123,15 +192,23 @@ entry:
; byval parameters.
define i32 @caller_byval() #0 {
; CHECK-LABEL: caller_byval:
; CHECK: ! %bb.0: ! %entry
; CHECK-NEXT: save %sp, -104, %sp
; CHECK-NEXT: ld [%fp+-4], %i0
; CHECK-NEXT: st %i0, [%fp+-8]
; CHECK-NEXT: call callee_byval
; CHECK-NEXT: add %fp, -8, %o0
; CHECK-NEXT: ret
; CHECK-NEXT: restore %g0, %o0, %o0
; V8-LABEL: caller_byval:
; V8: ! %bb.0: ! %entry
; V8-NEXT: save %sp, -104, %sp
; V8-NEXT: ld [%fp+-4], %i0
; V8-NEXT: st %i0, [%fp+-8]
; V8-NEXT: call callee_byval
; V8-NEXT: add %fp, -8, %o0
; V8-NEXT: ret
; V8-NEXT: restore %g0, %o0, %o0
;
; V9-LABEL: caller_byval:
; V9: ! %bb.0: ! %entry
; V9-NEXT: save %sp, -192, %sp
; V9-NEXT: call callee_byval
; V9-NEXT: add %fp, 2039, %o0
; V9-NEXT: ret
; V9-NEXT: restore %g0, %o0, %o0
entry:
%a = alloca i32*
%r = tail call i32 @callee_byval(i32** byval(i32*) %a)
@ -141,11 +218,17 @@ entry:
; Perform tail call optimization for sret function.
define void @sret_test(%struct.a* noalias sret(%struct.a) %agg.result) #0 {
; CHECK-LABEL: sret_test:
; CHECK: ! %bb.0: ! %entry
; CHECK-NEXT: mov %o7, %g1
; CHECK-NEXT: call sret_func
; CHECK-NEXT: mov %g1, %o7
; V8-LABEL: sret_test:
; V8: ! %bb.0: ! %entry
; V8-NEXT: mov %o7, %g1
; V8-NEXT: call sret_func
; V8-NEXT: mov %g1, %o7
;
; V9-LABEL: sret_test:
; V9: ! %bb.0: ! %entry
; V9-NEXT: mov %o7, %g1
; V9-NEXT: call sret_func
; V9-NEXT: mov %g1, %o7
entry:
tail call void bitcast (void (%struct.a*)* @sret_func to
void (%struct.a*)*)(%struct.a* sret(%struct.a) %agg.result)
@ -157,17 +240,30 @@ entry:
; struct will generate a memcpy as the tail function.
define void @ret_large_struct(%struct.big* noalias sret(%struct.big) %agg.result) #0 {
; CHECK-LABEL: ret_large_struct:
; CHECK: ! %bb.0: ! %entry
; CHECK-NEXT: save %sp, -96, %sp
; CHECK-NEXT: ld [%fp+64], %i0
; CHECK-NEXT: sethi %hi(bigstruct), %i1
; CHECK-NEXT: add %i1, %lo(bigstruct), %o1
; CHECK-NEXT: mov 400, %o2
; CHECK-NEXT: call memcpy
; CHECK-NEXT: mov %i0, %o0
; CHECK-NEXT: jmp %i7+12
; CHECK-NEXT: restore
; V8-LABEL: ret_large_struct:
; V8: ! %bb.0: ! %entry
; V8-NEXT: save %sp, -96, %sp
; V8-NEXT: ld [%fp+64], %i0
; V8-NEXT: sethi %hi(bigstruct), %i1
; V8-NEXT: add %i1, %lo(bigstruct), %o1
; V8-NEXT: mov 400, %o2
; V8-NEXT: call memcpy
; V8-NEXT: mov %i0, %o0
; V8-NEXT: jmp %i7+12
; V8-NEXT: restore
;
; V9-LABEL: ret_large_struct:
; V9: ! %bb.0: ! %entry
; V9-NEXT: save %sp, -176, %sp
; V9-NEXT: sethi %h44(bigstruct), %i1
; V9-NEXT: add %i1, %m44(bigstruct), %i1
; V9-NEXT: sllx %i1, 12, %i1
; V9-NEXT: add %i1, %l44(bigstruct), %o1
; V9-NEXT: mov 400, %o2
; V9-NEXT: call memcpy
; V9-NEXT: mov %i0, %o0
; V9-NEXT: ret
; V9-NEXT: restore
entry:
%0 = bitcast %struct.big* %agg.result to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 bitcast (%struct.big* @bigstruct to i8*), i32 400, i1 false)
@ -177,10 +273,17 @@ entry:
; Test register + immediate pattern.
define void @addri_test(i32 %ptr) #0 {
; CHECK-LABEL: addri_test:
; CHECK: ! %bb.0: ! %entry
; CHECK-NEXT: jmp %o0+4
; CHECK-NEXT: nop
; V8-LABEL: addri_test:
; V8: ! %bb.0: ! %entry
; V8-NEXT: jmp %o0+4
; V8-NEXT: nop
;
; V9-LABEL: addri_test:
; V9: ! %bb.0: ! %entry
; V9-NEXT: add %o0, 4, %o0
; V9-NEXT: srl %o0, 0, %o0
; V9-NEXT: jmp %o0
; V9-NEXT: nop
entry:
%add = add nsw i32 %ptr, 4
%0 = inttoptr i32 %add to void ()*