[AArch64][SME] Always allocate a lazy-save buffer if a function has ZA state.

We already do this for most cases, with the exception of instructions that
get expanded to function calls (e.g. for lowering operations on fp128
values), in which case we temporarily allocate a lazy-save buffer.

The code that is generated in this case, is however incorrect, as it seems
to pass an incorrect address for the TPIDR2 object to the ZA restore
function. By always allocating the lazy-save buffer once, we avoid this
issue entirely.

The cost is that we also allocate such a buffer when it is not
needed. We could fix that in a follow-up patch, where we remove the
lazy-save buffer when it isn't used.

Reviewed By: paulwalker-arm

Differential Revision: https://reviews.llvm.org/D138208
This commit is contained in:
Sander de Smalen 2022-11-21 16:01:54 +00:00
parent ae7a3e1c1d
commit 3f9d64a2ad
3 changed files with 6 additions and 24 deletions

View File

@ -6013,21 +6013,6 @@ AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
}
/// Returns true if the Function has ZA state and contains at least one call to
/// a function that requires setting up a lazy-save buffer.
static bool requiresBufferForLazySave(const Function &F) {
SMEAttrs CallerAttrs(F);
if (!CallerAttrs.hasZAState())
return false;
for (const BasicBlock &BB : F)
for (const Instruction &I : BB)
if (const CallInst *Call = dyn_cast<CallInst>(&I))
if (CallerAttrs.requiresLazySave(SMEAttrs(*Call)))
return true;
return false;
}
unsigned
AArch64TargetLowering::allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
SelectionDAG &DAG) const {
@ -6443,8 +6428,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
if (Subtarget->hasCustomCallingConv())
Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
if (requiresBufferForLazySave(MF.getFunction())) {
// Set up a buffer once and store the buffer in the MachineFunctionInfo.
// Conservatively assume the function requires the lazy-save mechanism.
if (SMEAttrs(MF.getFunction()).hasZAState()) {
unsigned TPIDR2Obj = allocateLazySaveBuffer(Chain, DL, DAG);
FuncInfo->setLazySaveTPIDR2Obj(TPIDR2Obj);
}
@ -7041,9 +7026,6 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue NN = DAG.getNode(ISD::MUL, DL, MVT::i64, N, N);
unsigned TPIDR2Obj = FuncInfo->getLazySaveTPIDR2Obj();
if (!TPIDR2Obj)
TPIDR2Obj = allocateLazySaveBuffer(Chain, DL, DAG);
MachinePointerInfo MPI = MachinePointerInfo::getStack(MF, TPIDR2Obj);
SDValue TPIDR2ObjAddr = DAG.getFrameIndex(TPIDR2Obj,
DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));

View File

@ -310,12 +310,12 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_pstate_za_shared" nounwi
; CHECK-COMMON-NEXT: sub x9, x9, x8
; CHECK-COMMON-NEXT: mov sp, x9
; CHECK-COMMON-NEXT: sub x10, x29, #16
; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10
; CHECK-COMMON-NEXT: bl __addtf3
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: add x0, x29, #0
; CHECK-COMMON-NEXT: sub x0, x29, #16
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
; CHECK-COMMON-NEXT: cbnz x8, .LBB8_2
; CHECK-COMMON-NEXT: // %bb.1:

View File

@ -48,12 +48,12 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_pstate_za_shared" nounwi
; CHECK-NEXT: sub x9, x9, x8
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: sub x10, x29, #16
; CHECK-NEXT: sturh w8, [x29, #-8]
; CHECK-NEXT: stur x9, [x29, #-16]
; CHECK-NEXT: sturh w8, [x29, #-8]
; CHECK-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEXT: bl __addtf3
; CHECK-NEXT: smstart za
; CHECK-NEXT: add x0, x29, #0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: cbnz x8, .LBB1_2
; CHECK-NEXT: // %bb.1: