[VENTUS][feat] Add VentusAlwaysInlinePass

This commit is contained in:
wenhu1024 2025-04-04 19:33:39 +08:00
parent 798cf98a9f
commit f69aa543dd
7 changed files with 339 additions and 17 deletions

View File

@ -48,7 +48,8 @@ add_llvm_target(RISCVCodeGen
GISel/RISCVLegalizerInfo.cpp
GISel/RISCVRegisterBankInfo.cpp
VentusPrintfRuntimeBinding.cpp
VentusAlwaysInlinePass.cpp
LINK_COMPONENTS
Analysis
AsmPrinter

View File

@ -13,7 +13,6 @@
#ifndef LLVM_LIB_TARGET_RISCV_RISCV_H
#define LLVM_LIB_TARGET_RISCV_RISCV_H
#include "llvm/Pass.h"
#include "MCTargetDesc/RISCVBaseInfo.h"
#include "llvm/Target/TargetMachine.h"
@ -84,15 +83,26 @@ InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
RISCVRegisterBankInfo &);
ModulePass *createVentusPrintfRuntimeBinding();
void initializeVentusPrintfRuntimeBindingPass(PassRegistry&);
void initializeVentusPrintfRuntimeBindingPass(PassRegistry &);
extern char &VentusPrintfRuntimeBindingID;
struct VentusPrintfRuntimeBindingPass
: PassInfoMixin<VentusPrintfRuntimeBindingPass> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
}
ModulePass *createVentusAlwaysInlinePass(bool GlobalOpt = true);
void initializeVentusAlwaysInlinePass(PassRegistry &Registry);
struct VentusAlwaysInlinePass : public PassInfoMixin<VentusAlwaysInlinePass> {
VentusAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
private:
bool GlobalOpt;
};
} // namespace llvm
/// OpenCL uses address spaces to differentiate between
/// various memory regions on the hardware. On the CPU
@ -101,19 +111,19 @@ struct VentusPrintfRuntimeBindingPass
/// a separate piece of memory that is unique from other
/// memory locations.
namespace RISCVAS {
enum : unsigned {
// The maximum value for flat, generic, local, private, constant and region.
MAX_VENTUS_ADDRESS = 5,
enum : unsigned {
// The maximum value for flat, generic, local, private, constant and region.
MAX_VENTUS_ADDRESS = 5,
FLAT_ADDRESS = 0, ///< Address space for flat memory.
GLOBAL_ADDRESS = 1, ///< Address space for global memory
CONSTANT_ADDRESS = 4, ///< Address space for constant memory
LOCAL_ADDRESS = 3, ///< Address space for local memory.
PRIVATE_ADDRESS = 5, ///< Address space for private memory.
FLAT_ADDRESS = 0, ///< Address space for flat memory.
GLOBAL_ADDRESS = 1, ///< Address space for global memory
CONSTANT_ADDRESS = 4, ///< Address space for constant memory
LOCAL_ADDRESS = 3, ///< Address space for local memory.
PRIVATE_ADDRESS = 5, ///< Address space for private memory.
// Some places use this if the address space can't be determined.
UNKNOWN_ADDRESS_SPACE = ~0u,
};
// Some places use this if the address space can't be determined.
UNKNOWN_ADDRESS_SPACE = ~0u,
};
}
/// Because there are two stacks in ventus, we need to add a VGPRSpill according

View File

@ -36,9 +36,11 @@
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/Scalar.h"
#include <optional>
using namespace llvm;
static cl::opt<bool> EnableRedundantCopyElimination(
@ -56,6 +58,11 @@ static cl::opt<bool>
cl::desc("Enable the machine combiner pass"),
cl::init(true), cl::Hidden);
// Option to inline all early.
static cl::opt<bool> EarlyInlineAll("ventus-early-inline-all",
cl::desc("Inline all functions early"),
cl::init(false), cl::Hidden);
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
@ -68,6 +75,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVPreRAExpandPseudoPass(*PR);
initializeRISCVExpandPseudoPass(*PR);
initializeVentusPrintfRuntimeBindingPass(*PR);
initializeVentusAlwaysInlinePass(*PR);
}
static StringRef computeDataLayout(const Triple &TT, StringRef CPU) {
@ -75,7 +83,7 @@ static StringRef computeDataLayout(const Triple &TT, StringRef CPU) {
// return "e-m:e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256"
// "-v256:256-v512:512-v1024:1024-n32:64-S128-A5-G1";
bool IsRV32 = TT.isRISCV32();
if(!IsRV32)
if (!IsRV32)
return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128-A5-G1";
assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported");
return "e-m:e-p:32:32-i64:64-n32-S128-A5-G1";
@ -145,12 +153,19 @@ void RISCVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PM.addPass(VentusPrintfRuntimeBindingPass());
return true;
}
if (PassName == "ventus-always-inline") {
PM.addPass(VentusAlwaysInlinePass());
return true;
}
return false;
});
PB.registerPipelineEarlySimplificationEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
PM.addPass(VentusPrintfRuntimeBindingPass());
if (EarlyInlineAll)
PM.addPass(VentusAlwaysInlinePass());
});
}

View File

@ -0,0 +1,156 @@
//===-- VentusAlwaysInlinePass.cpp - Force Function Inlining --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This pass marks functions for inlining in Ventus code. Specifically:
/// 1. Functions accessing LOCAL memory (addrspace(3)) are marked as always_inline
/// 2. Under stress-calls mode, non-kernel functions are marked as noinline
/// 3. Otherwise, non-kernel functions are marked as always_inline
/// 4. Function aliases are replaced with their targets and optionally removed
//
//===----------------------------------------------------------------------===//
#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
#define VENTUS_ALWAYS_INLINE "Ventus Inline All Functions"
#define DEBUG_TYPE "ventus-always-inline"
namespace {
static cl::opt<bool> StressCalls("ventus-stress-function-calls", cl::Hidden,
cl::desc("Force all functions to be noinline"),
cl::init(false));
class VentusAlwaysInline : public ModulePass {
bool GlobalOpt;
public:
static char ID;
VentusAlwaysInline(bool GlobalOpt = false)
: ModulePass(ID), GlobalOpt(GlobalOpt) {}
bool runOnModule(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
StringRef getPassName() const override { return VENTUS_ALWAYS_INLINE; }
};
} // End anonymous namespace
char VentusAlwaysInline::ID = 0;
static void
recursivelyVisitUsers(GlobalValue &GV,
SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {
SmallVector<User *, 16> Stack(GV.users());
SmallPtrSet<const Value *, 8> Visited;
while (!Stack.empty()) {
User *U = Stack.pop_back_val();
if (!Visited.insert(U).second)
continue;
if (Instruction *I = dyn_cast<Instruction>(U)) {
Function *F = I->getParent()->getParent();
if (F->getCallingConv() != CallingConv::VENTUS_KERNEL) {
F->removeFnAttr(Attribute::NoInline);
FuncsToAlwaysInline.insert(F);
Stack.push_back(F);
}
continue;
}
append_range(Stack, U->users());
}
}
static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
std::vector<GlobalAlias *> AliasesToRemove;
SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
SmallPtrSet<Function *, 8> FuncsToNoInline;
Triple TT(M.getTargetTriple());
for (GlobalAlias &A : M.aliases()) {
if (Function *F = dyn_cast<Function>(A.getAliasee())) {
if (TT.getArch() == Triple::riscv32 &&
A.getLinkage() != GlobalValue::InternalLinkage)
continue;
A.replaceAllUsesWith(F);
AliasesToRemove.push_back(&A);
}
// FIXME: If the aliasee isn't a function, it's some kind of constant expr
// cast that won't be inlined through.
}
if (GlobalOpt) {
for (GlobalAlias *A : AliasesToRemove) {
A->eraseFromParent();
}
}
for (GlobalVariable &GV : M.globals()) {
unsigned AS = GV.getAddressSpace();
if (AS == RISCVAS::LOCAL_ADDRESS) {
recursivelyVisitUsers(GV, FuncsToAlwaysInline);
}
}
auto IncompatAttr =
StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;
for (Function &F : M) {
if (!F.isDeclaration() && !F.use_empty() &&
!F.hasFnAttribute(IncompatAttr) &&
F.getCallingConv() != CallingConv::VENTUS_KERNEL) {
if (StressCalls) {
if (!FuncsToAlwaysInline.count(&F))
FuncsToNoInline.insert(&F);
} else
FuncsToAlwaysInline.insert(&F);
}
}
for (Function *F : FuncsToAlwaysInline)
F->addFnAttr(Attribute::AlwaysInline);
for (Function *F : FuncsToNoInline)
F->addFnAttr(Attribute::NoInline);
return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();
}
bool VentusAlwaysInline::runOnModule(Module &M) {
return alwaysInlineImpl(M, GlobalOpt);
}
INITIALIZE_PASS(VentusAlwaysInline, "ventus-always-inline",
VENTUS_ALWAYS_INLINE, false, false)
namespace llvm {
ModulePass *createVentusAlwaysInlinePass(bool GlobalOpt) {
return new VentusAlwaysInline(GlobalOpt);
}
PreservedAnalyses VentusAlwaysInlinePass::run(Module &M,
ModuleAnalysisManager &AM) {
alwaysInlineImpl(M, GlobalOpt);
return PreservedAnalyses::all();
}
} // end namespace llvm

View File

@ -0,0 +1,89 @@
; RUN: opt -S -mtriple=riscv32 -passes=ventus-always-inline %s | FileCheck --check-prefix=ALL %s
; RUN: opt -S -mtriple=riscv32 -ventus-stress-function-calls -passes=ventus-always-inline %s | FileCheck --check-prefix=ALL %s
@local0 = addrspace(3) global i32 undef, align 4
@local1 = addrspace(3) global [512 x i32] undef, align 4
@nested.local.address = addrspace(1) global ptr addrspace(3) @local0, align 4
@alias.local0 = alias i32, ptr addrspace(3) @local0
@local.cycle = addrspace(3) global i32 ptrtoint (ptr addrspace(3) @local.cycle to i32), align 4
; ALL-LABEL: define i32 @load_local_simple() #0 {
define i32 @load_local_simple() {
%load = load i32, ptr addrspace(3) @local0, align 4
ret i32 %load
}
; ALL-LABEL: define i32 @load_local_const_gep() #0 {
define i32 @load_local_const_gep() {
%load = load i32, ptr addrspace(3) getelementptr inbounds ([512 x i32], ptr addrspace(3) @local1, i64 0, i64 4), align 4
ret i32 %load
}
; ALL-LABEL: define i32 @load_local_var_gep(i32 %idx) #0 {
define i32 @load_local_var_gep(i32 %idx) {
%gep = getelementptr inbounds [512 x i32], ptr addrspace(3) @local1, i32 0, i32 %idx
%load = load i32, ptr addrspace(3) %gep, align 4
ret i32 %load
}
; ALL-LABEL: define ptr addrspace(3) @load_nested_address(i32 %idx) #0 {
define ptr addrspace(3) @load_nested_address(i32 %idx) {
%load = load ptr addrspace(3), ptr addrspace(1) @nested.local.address, align 4
ret ptr addrspace(3) %load
}
; ALL-LABEL: define i32 @load_local_alias() #0 {
define i32 @load_local_alias() {
%load = load i32, ptr addrspace(3) @alias.local0, align 4
ret i32 %load
}
; ALL-LABEL: define i32 @load_local_cycle() #0 {
define i32 @load_local_cycle() {
%load = load i32, ptr addrspace(3) @local.cycle, align 4
ret i32 %load
}
; ALL-LABEL: define i1 @icmp_local_address() #0 {
define i1 @icmp_local_address() {
ret i1 icmp eq (ptr addrspace(3) @local0, ptr addrspace(3) null)
}
; ALL-LABEL: define i32 @transitive_call() #0 {
define i32 @transitive_call() {
%call = call i32 @load_local_simple()
ret i32 %call
}
; ALL-LABEL: define i32 @recursive_call_local(i32 %arg0) #0 {
define i32 @recursive_call_local(i32 %arg0) {
%load = load i32, ptr addrspace(3) @local0, align 4
%add = add i32 %arg0, %load
%call = call i32 @recursive_call_local(i32 %add)
ret i32 %call
}
; ALL-LABEL: define i32 @load_local_simple_noinline() #0 {
define i32 @load_local_simple_noinline() noinline {
%load = load i32, ptr addrspace(3) @local0, align 4
ret i32 %load
}
; ALL-LABEL: define i32 @recursive_call_local_noinline(i32 %arg0) #0 {
define i32 @recursive_call_local_noinline(i32 %arg0) noinline {
%load = load i32, ptr addrspace(3) @local0, align 4
%add = add i32 %arg0, %load
%call = call i32 @recursive_call_local(i32 %add)
ret i32 %call
}
; ALL-LABEL: define ventus_kernel void @kernel_with_local_access(
define ventus_kernel void @kernel_with_local_access(ptr addrspace(1) %out) {
%load = load i32, ptr addrspace(3) @local0, align 4
store i32 %load, ptr addrspace(1) %out, align 4
ret void
}
; ALL: attributes #0 = { alwaysinline }

View File

@ -0,0 +1,18 @@
; RUN: opt -S -mtriple=riscv32 -passes=ventus-always-inline %s | FileCheck %s
@internal_alias = internal alias i32 (i32), ptr @original_function
@public_alias = alias i32 (i32), ptr @original_function
define i32 @original_function(i32 %x) {
%result = add i32 %x, 42
ret i32 %result
}
define i32 @call_public() {
%res = call i32 @public_alias(i32 7)
ret i32 %res
}
; CHECK: define i32 @original_function
; CHECK-NOT: @internal_alias
; CHECK: @public_alias

View File

@ -0,0 +1,33 @@
; RUN: opt -mtriple=riscv32 -O1 -S -inline-threshold=1 -ventus-early-inline-all %s | FileCheck %s
@c_alias = dso_local alias i32 (i32), ptr @callee
define dso_local i32 @callee(i32 %x) {
entry:
%mul1 = mul i32 %x, %x
%mul2 = mul i32 %mul1, %x
%mul3 = mul i32 %mul1, %mul2
%mul4 = mul i32 %mul3, %mul2
%mul5 = mul i32 %mul4, %mul3
ret i32 %mul5
}
; CHECK-LABEL: @caller
; CHECK: mul i32
; CHECK-NOT: call i32
define ventus_kernel void @caller(i32 %x) {
entry:
%res = call i32 @callee(i32 %x)
store volatile i32 %res, ptr addrspace(1) undef
ret void
}
; CHECK-LABEL: @alias_caller(
; CHECK-NOT: call
define ventus_kernel void @alias_caller(i32 %x) {
entry:
%res = call i32 @c_alias(i32 %x)
store volatile i32 %res, ptr addrspace(1) undef
ret void
}