diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 6e1dff9f8b40..c699971579a7 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -615,8 +615,8 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, return Offset; } - assert(StackID == TargetStackID::Default && - "SGPRSpill stack should not reach here!"); + // assert(StackID == TargetStackID::Default && + // "SGPRSpill stack should not reach here!"); if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) { assert(0 && "TODO: Add stack realignment support for Ventus?"); @@ -649,8 +649,8 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, } if (FrameReg == getFPReg(STI)) { - assert(0 && "TODO: Add fp support for Ventus?"); - Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize()); + // assert(0 && "TODO: Add fp support for Ventus?"); + Offset -= StackOffset::getFixed(RVFI->getVarArgsSaveSize()); if (FI >= 0) Offset -= StackOffset::getFixed(RVFI->getLibCallStackSize()); // When using FP to access scalable vector objects, we need to minus @@ -685,7 +685,7 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, // |--------------------------| -- <-- SP // if (MFI.isFixedObjectIndex(FI)) { - assert(0 && "TODO!"); + // assert(0 && "TODO!"); assert(!RI->hasStackRealignment(MF) && "Can't index across variable sized realign"); Offset -= StackOffset::get(MFI.getStackSize() + diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 00f183e0d38a..9a3f24d2d97b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5371,6 +5371,12 @@ static const MCPhysReg ArgVGPRs[] = { RISCV::V30, RISCV::V31 }; +// Registers used for variadic functions +static const MCPhysReg VarArgVGPRs[] = { + RISCV::V0, RISCV::V1, RISCV::V2, RISCV::V3, + RISCV::V4, RISCV::V5, RISCV::V6, RISCV::V7 +}; + // Pass a 2*XLEN argument that has been split into two XLEN values through // registers or the stack as necessary. static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, @@ -5794,17 +5800,19 @@ SDValue RISCVTargetLowering::LowerFormalArguments( } if (IsVarArg) { - assert(0 && "TODO: VarArg lowering is not finished!"); - ArrayRef ArgRegs = makeArrayRef(ArgVGPRs); + // When it come to vardic arguments, the vardic function also need to follow + // no-kernel function calling convention, we need to use VGPRs to pass + // arguments, here we use v0-v7 registers. + ArrayRef ArgRegs = makeArrayRef(VarArgVGPRs); unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); - const TargetRegisterClass *RC = &RISCV::GPRRegClass; + const TargetRegisterClass *RC = &RISCV::VGPRRegClass; MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); RISCVMachineFunctionInfo *RVFI = MF.getInfo(); // Offset of the first variable argument from stack pointer, and size of // the vararg save area. For now, the varargs save area is either zero or - // large enough to hold a0-a7. + // large enough to hold v0-v7. int VaArgOffset, VarArgsSaveSize; // If all registers are allocated, then all varargs must be passed on the @@ -5813,8 +5821,9 @@ SDValue RISCVTargetLowering::LowerFormalArguments( VaArgOffset = CCInfo.getNextStackOffset(); VarArgsSaveSize = 0; } else { + // The offsets for left unused registers VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); - VaArgOffset = -VarArgsSaveSize; + VaArgOffset = VarArgsSaveSize; } // Record the frame index of the first variable argument @@ -5833,7 +5842,9 @@ SDValue RISCVTargetLowering::LowerFormalArguments( // Copy the integer registers that may have been used for passing varargs // to the vararg save area. for (unsigned I = Idx; I < ArgRegs.size(); - ++I, VaArgOffset += XLenInBytes) { + ++I, VaArgOffset -= XLenInBytes) { + // Since the stack is growing downsides, we need to adjust the way for + // offset calculation const Register Reg = RegInfo.createVirtualRegister(RC); RegInfo.addLiveIn(ArgRegs[I], Reg); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/var-arg.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/var-arg.ll new file mode 100644 index 000000000000..0d3d469ecbc3 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/var-arg.ll @@ -0,0 +1,99 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=VENTUS %s + +target datalayout = "e-m:e-p:32:32-i64:64-n32-S128" +target triple = "riscv32" + +@MAX_FORMAT_STR_SIZE = dso_local global i32 64, align 4 +@PRINT_BUFFER_ADDR = dso_local global ptr inttoptr (i32 -1878900736 to ptr), align 4 + +; Function Attrs: noinline nounwind optnone vscale_range(1,2048) +define dso_local i32 @printf(ptr noundef %fmt, ...) { +; VENTUS-LABEL: printf: +; VENTUS: # %bb.0: # %entry +; VENTUS-NEXT: addi sp, sp, 48 +; VENTUS-NEXT: addi tp, tp, 48 +; VENTUS-NEXT: .cfi_def_cfa_offset 48 +; VENTUS-NEXT: li a0, 0 +; VENTUS-NEXT: vsw.v v7, -44(tp) +; VENTUS-NEXT: vsw.v v6, -40(tp) +; VENTUS-NEXT: vsw.v v5, -36(tp) +; VENTUS-NEXT: vsw.v v4, -32(tp) +; VENTUS-NEXT: vsw.v v3, -28(tp) +; VENTUS-NEXT: vsw.v v2, -24(tp) +; VENTUS-NEXT: vsw.v v1, -20(tp) +; VENTUS-NEXT: addi a1, tp, -16 +; VENTUS-NEXT: sw a1, -16(tp) +; VENTUS-NEXT: vmv.x.s a1, v1 +; VENTUS-NEXT: lui a2, %hi(MAX_FORMAT_STR_SIZE) +; VENTUS-NEXT: lui a3, %hi(PRINT_BUFFER_ADDR) +; VENTUS-NEXT: lw a4, %lo(MAX_FORMAT_STR_SIZE)(a2) +; VENTUS-NEXT: bge a0, a4, .LBB0_2 +; VENTUS-NEXT: .LBB0_1: # %for.body +; VENTUS-NEXT: # =>This Inner Loop Header: Depth=1 +; VENTUS-NEXT: # kill: def $v1 killed $x10 +; VENTUS-NEXT: vadd.vx v1, v0, a0 +; VENTUS-NEXT: vmv.x.s a4, v1 +; VENTUS-NEXT: lw a5, %lo(PRINT_BUFFER_ADDR)(a3) +; VENTUS-NEXT: lb a4, 0(a4) +; VENTUS-NEXT: addi a6, a5, 1 +; VENTUS-NEXT: sw a6, %lo(PRINT_BUFFER_ADDR)(a3) +; VENTUS-NEXT: sb a4, 0(a5) +; VENTUS-NEXT: addi a0, a0, 1 +; VENTUS-NEXT: lw a4, %lo(MAX_FORMAT_STR_SIZE)(a2) +; VENTUS-NEXT: blt a0, a4, .LBB0_1 +; VENTUS-NEXT: .LBB0_2: # %for.end +; VENTUS-NEXT: vmv.v.x v0, a1 +; VENTUS-NEXT: addi sp, sp, -48 +; VENTUS-NEXT: addi tp, tp, -48 +; VENTUS-NEXT: ret +entry: + %fmt.addr = alloca ptr, align 4 + %va = alloca ptr, align 4 + %v = alloca i32, align 4 + %i = alloca i32, align 4 + store ptr %fmt, ptr %fmt.addr, align 4 + call void @llvm.va_start(ptr %va) + %argp.cur = load ptr, ptr %va, align 4 + %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4 + store ptr %argp.next, ptr %va, align 4 + %0 = load i32, ptr %argp.cur, align 4 + store i32 %0, ptr %v, align 4 + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %1 = load i32, ptr %i, align 4 + %2 = load i32, ptr @MAX_FORMAT_STR_SIZE, align 4 + %cmp = icmp slt i32 %1, %2 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %3 = load ptr, ptr %fmt.addr, align 4 + %incdec.ptr = getelementptr inbounds i8, ptr %3, i32 1 + store ptr %incdec.ptr, ptr %fmt.addr, align 4 + %4 = load i8, ptr %3, align 1 + %5 = load ptr, ptr @PRINT_BUFFER_ADDR, align 4 + %incdec.ptr1 = getelementptr inbounds i8, ptr %5, i32 1 + store ptr %incdec.ptr1, ptr @PRINT_BUFFER_ADDR, align 4 + store i8 %4, ptr %5, align 1 + br label %for.inc + +for.inc: ; preds = %for.body + %6 = load i32, ptr %i, align 4 + %inc = add nsw i32 %6, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + call void @llvm.va_end(ptr %va) + %7 = load i32, ptr %v, align 4 + ret i32 %7 +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare void @llvm.va_start(ptr) + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare void @llvm.va_end(ptr)