[VENTUS][RISCV][feat] Add vararg backend support in ventus
We adjust the stack growing direction early months for OpenCL, in order to be compatible with current architecture, we need to do some modification to support vararg
This commit is contained in:
parent
b01963690d
commit
f28e6c5e38
|
@ -615,8 +615,8 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
|
|||
return Offset;
|
||||
}
|
||||
|
||||
assert(StackID == TargetStackID::Default &&
|
||||
"SGPRSpill stack should not reach here!");
|
||||
// assert(StackID == TargetStackID::Default &&
|
||||
// "SGPRSpill stack should not reach here!");
|
||||
|
||||
if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) {
|
||||
assert(0 && "TODO: Add stack realignment support for Ventus?");
|
||||
|
@ -649,8 +649,8 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
|
|||
}
|
||||
|
||||
if (FrameReg == getFPReg(STI)) {
|
||||
assert(0 && "TODO: Add fp support for Ventus?");
|
||||
Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize());
|
||||
// assert(0 && "TODO: Add fp support for Ventus?");
|
||||
Offset -= StackOffset::getFixed(RVFI->getVarArgsSaveSize());
|
||||
if (FI >= 0)
|
||||
Offset -= StackOffset::getFixed(RVFI->getLibCallStackSize());
|
||||
// When using FP to access scalable vector objects, we need to minus
|
||||
|
@ -685,7 +685,7 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
|
|||
// |--------------------------| -- <-- SP
|
||||
//
|
||||
if (MFI.isFixedObjectIndex(FI)) {
|
||||
assert(0 && "TODO!");
|
||||
// assert(0 && "TODO!");
|
||||
assert(!RI->hasStackRealignment(MF) &&
|
||||
"Can't index across variable sized realign");
|
||||
Offset -= StackOffset::get(MFI.getStackSize() +
|
||||
|
|
|
@ -5371,6 +5371,12 @@ static const MCPhysReg ArgVGPRs[] = {
|
|||
RISCV::V30, RISCV::V31
|
||||
};
|
||||
|
||||
// Registers used for variadic functions
|
||||
static const MCPhysReg VarArgVGPRs[] = {
|
||||
RISCV::V0, RISCV::V1, RISCV::V2, RISCV::V3,
|
||||
RISCV::V4, RISCV::V5, RISCV::V6, RISCV::V7
|
||||
};
|
||||
|
||||
// Pass a 2*XLEN argument that has been split into two XLEN values through
|
||||
// registers or the stack as necessary.
|
||||
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
|
||||
|
@ -5794,17 +5800,19 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
|
|||
}
|
||||
|
||||
if (IsVarArg) {
|
||||
assert(0 && "TODO: VarArg lowering is not finished!");
|
||||
ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgVGPRs);
|
||||
// When it come to vardic arguments, the vardic function also need to follow
|
||||
// no-kernel function calling convention, we need to use VGPRs to pass
|
||||
// arguments, here we use v0-v7 registers.
|
||||
ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(VarArgVGPRs);
|
||||
unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
|
||||
const TargetRegisterClass *RC = &RISCV::GPRRegClass;
|
||||
const TargetRegisterClass *RC = &RISCV::VGPRRegClass;
|
||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
MachineRegisterInfo &RegInfo = MF.getRegInfo();
|
||||
RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
|
||||
|
||||
// Offset of the first variable argument from stack pointer, and size of
|
||||
// the vararg save area. For now, the varargs save area is either zero or
|
||||
// large enough to hold a0-a7.
|
||||
// large enough to hold v0-v7.
|
||||
int VaArgOffset, VarArgsSaveSize;
|
||||
|
||||
// If all registers are allocated, then all varargs must be passed on the
|
||||
|
@ -5813,8 +5821,9 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
|
|||
VaArgOffset = CCInfo.getNextStackOffset();
|
||||
VarArgsSaveSize = 0;
|
||||
} else {
|
||||
// The offsets for left unused registers
|
||||
VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
|
||||
VaArgOffset = -VarArgsSaveSize;
|
||||
VaArgOffset = VarArgsSaveSize;
|
||||
}
|
||||
|
||||
// Record the frame index of the first variable argument
|
||||
|
@ -5833,7 +5842,9 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
|
|||
// Copy the integer registers that may have been used for passing varargs
|
||||
// to the vararg save area.
|
||||
for (unsigned I = Idx; I < ArgRegs.size();
|
||||
++I, VaArgOffset += XLenInBytes) {
|
||||
++I, VaArgOffset -= XLenInBytes) {
|
||||
// Since the stack is growing downsides, we need to adjust the way for
|
||||
// offset calculation
|
||||
const Register Reg = RegInfo.createVirtualRegister(RC);
|
||||
RegInfo.addLiveIn(ArgRegs[I], Reg);
|
||||
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
|
||||
|
|
|
@ -0,0 +1,99 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
|
||||
; RUN: | FileCheck -check-prefix=VENTUS %s
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-n32-S128"
|
||||
target triple = "riscv32"
|
||||
|
||||
@MAX_FORMAT_STR_SIZE = dso_local global i32 64, align 4
|
||||
@PRINT_BUFFER_ADDR = dso_local global ptr inttoptr (i32 -1878900736 to ptr), align 4
|
||||
|
||||
; Function Attrs: noinline nounwind optnone vscale_range(1,2048)
|
||||
define dso_local i32 @printf(ptr noundef %fmt, ...) {
|
||||
; VENTUS-LABEL: printf:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 48
|
||||
; VENTUS-NEXT: addi tp, tp, 48
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 48
|
||||
; VENTUS-NEXT: li a0, 0
|
||||
; VENTUS-NEXT: vsw.v v7, -44(tp)
|
||||
; VENTUS-NEXT: vsw.v v6, -40(tp)
|
||||
; VENTUS-NEXT: vsw.v v5, -36(tp)
|
||||
; VENTUS-NEXT: vsw.v v4, -32(tp)
|
||||
; VENTUS-NEXT: vsw.v v3, -28(tp)
|
||||
; VENTUS-NEXT: vsw.v v2, -24(tp)
|
||||
; VENTUS-NEXT: vsw.v v1, -20(tp)
|
||||
; VENTUS-NEXT: addi a1, tp, -16
|
||||
; VENTUS-NEXT: sw a1, -16(tp)
|
||||
; VENTUS-NEXT: vmv.x.s a1, v1
|
||||
; VENTUS-NEXT: lui a2, %hi(MAX_FORMAT_STR_SIZE)
|
||||
; VENTUS-NEXT: lui a3, %hi(PRINT_BUFFER_ADDR)
|
||||
; VENTUS-NEXT: lw a4, %lo(MAX_FORMAT_STR_SIZE)(a2)
|
||||
; VENTUS-NEXT: bge a0, a4, .LBB0_2
|
||||
; VENTUS-NEXT: .LBB0_1: # %for.body
|
||||
; VENTUS-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; VENTUS-NEXT: # kill: def $v1 killed $x10
|
||||
; VENTUS-NEXT: vadd.vx v1, v0, a0
|
||||
; VENTUS-NEXT: vmv.x.s a4, v1
|
||||
; VENTUS-NEXT: lw a5, %lo(PRINT_BUFFER_ADDR)(a3)
|
||||
; VENTUS-NEXT: lb a4, 0(a4)
|
||||
; VENTUS-NEXT: addi a6, a5, 1
|
||||
; VENTUS-NEXT: sw a6, %lo(PRINT_BUFFER_ADDR)(a3)
|
||||
; VENTUS-NEXT: sb a4, 0(a5)
|
||||
; VENTUS-NEXT: addi a0, a0, 1
|
||||
; VENTUS-NEXT: lw a4, %lo(MAX_FORMAT_STR_SIZE)(a2)
|
||||
; VENTUS-NEXT: blt a0, a4, .LBB0_1
|
||||
; VENTUS-NEXT: .LBB0_2: # %for.end
|
||||
; VENTUS-NEXT: vmv.v.x v0, a1
|
||||
; VENTUS-NEXT: addi sp, sp, -48
|
||||
; VENTUS-NEXT: addi tp, tp, -48
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%fmt.addr = alloca ptr, align 4
|
||||
%va = alloca ptr, align 4
|
||||
%v = alloca i32, align 4
|
||||
%i = alloca i32, align 4
|
||||
store ptr %fmt, ptr %fmt.addr, align 4
|
||||
call void @llvm.va_start(ptr %va)
|
||||
%argp.cur = load ptr, ptr %va, align 4
|
||||
%argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
|
||||
store ptr %argp.next, ptr %va, align 4
|
||||
%0 = load i32, ptr %argp.cur, align 4
|
||||
store i32 %0, ptr %v, align 4
|
||||
store i32 0, ptr %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%1 = load i32, ptr %i, align 4
|
||||
%2 = load i32, ptr @MAX_FORMAT_STR_SIZE, align 4
|
||||
%cmp = icmp slt i32 %1, %2
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%3 = load ptr, ptr %fmt.addr, align 4
|
||||
%incdec.ptr = getelementptr inbounds i8, ptr %3, i32 1
|
||||
store ptr %incdec.ptr, ptr %fmt.addr, align 4
|
||||
%4 = load i8, ptr %3, align 1
|
||||
%5 = load ptr, ptr @PRINT_BUFFER_ADDR, align 4
|
||||
%incdec.ptr1 = getelementptr inbounds i8, ptr %5, i32 1
|
||||
store ptr %incdec.ptr1, ptr @PRINT_BUFFER_ADDR, align 4
|
||||
store i8 %4, ptr %5, align 1
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%6 = load i32, ptr %i, align 4
|
||||
%inc = add nsw i32 %6, 1
|
||||
store i32 %inc, ptr %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
call void @llvm.va_end(ptr %va)
|
||||
%7 = load i32, ptr %v, align 4
|
||||
ret i32 %7
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind willreturn
|
||||
declare void @llvm.va_start(ptr)
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind willreturn
|
||||
declare void @llvm.va_end(ptr)
|
Loading…
Reference in New Issue