[VENTUS][feat] Add VentusAlwaysInlinePass
This commit is contained in:
parent
798cf98a9f
commit
f69aa543dd
|
@ -48,7 +48,8 @@ add_llvm_target(RISCVCodeGen
|
|||
GISel/RISCVLegalizerInfo.cpp
|
||||
GISel/RISCVRegisterBankInfo.cpp
|
||||
VentusPrintfRuntimeBinding.cpp
|
||||
|
||||
VentusAlwaysInlinePass.cpp
|
||||
|
||||
LINK_COMPONENTS
|
||||
Analysis
|
||||
AsmPrinter
|
||||
|
|
|
@ -13,7 +13,6 @@
|
|||
|
||||
#ifndef LLVM_LIB_TARGET_RISCV_RISCV_H
|
||||
#define LLVM_LIB_TARGET_RISCV_RISCV_H
|
||||
|
||||
#include "llvm/Pass.h"
|
||||
#include "MCTargetDesc/RISCVBaseInfo.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
@ -84,15 +83,26 @@ InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
|
|||
RISCVRegisterBankInfo &);
|
||||
|
||||
ModulePass *createVentusPrintfRuntimeBinding();
|
||||
void initializeVentusPrintfRuntimeBindingPass(PassRegistry&);
|
||||
void initializeVentusPrintfRuntimeBindingPass(PassRegistry &);
|
||||
extern char &VentusPrintfRuntimeBindingID;
|
||||
|
||||
struct VentusPrintfRuntimeBindingPass
|
||||
: PassInfoMixin<VentusPrintfRuntimeBindingPass> {
|
||||
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
|
||||
};
|
||||
}
|
||||
|
||||
ModulePass *createVentusAlwaysInlinePass(bool GlobalOpt = true);
|
||||
void initializeVentusAlwaysInlinePass(PassRegistry &Registry);
|
||||
|
||||
struct VentusAlwaysInlinePass : public PassInfoMixin<VentusAlwaysInlinePass> {
|
||||
VentusAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {}
|
||||
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
|
||||
|
||||
private:
|
||||
bool GlobalOpt;
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
/// OpenCL uses address spaces to differentiate between
|
||||
/// various memory regions on the hardware. On the CPU
|
||||
|
@ -101,19 +111,19 @@ struct VentusPrintfRuntimeBindingPass
|
|||
/// a separate piece of memory that is unique from other
|
||||
/// memory locations.
|
||||
namespace RISCVAS {
|
||||
enum : unsigned {
|
||||
// The maximum value for flat, generic, local, private, constant and region.
|
||||
MAX_VENTUS_ADDRESS = 5,
|
||||
enum : unsigned {
|
||||
// The maximum value for flat, generic, local, private, constant and region.
|
||||
MAX_VENTUS_ADDRESS = 5,
|
||||
|
||||
FLAT_ADDRESS = 0, ///< Address space for flat memory.
|
||||
GLOBAL_ADDRESS = 1, ///< Address space for global memory
|
||||
CONSTANT_ADDRESS = 4, ///< Address space for constant memory
|
||||
LOCAL_ADDRESS = 3, ///< Address space for local memory.
|
||||
PRIVATE_ADDRESS = 5, ///< Address space for private memory.
|
||||
FLAT_ADDRESS = 0, ///< Address space for flat memory.
|
||||
GLOBAL_ADDRESS = 1, ///< Address space for global memory
|
||||
CONSTANT_ADDRESS = 4, ///< Address space for constant memory
|
||||
LOCAL_ADDRESS = 3, ///< Address space for local memory.
|
||||
PRIVATE_ADDRESS = 5, ///< Address space for private memory.
|
||||
|
||||
// Some places use this if the address space can't be determined.
|
||||
UNKNOWN_ADDRESS_SPACE = ~0u,
|
||||
};
|
||||
// Some places use this if the address space can't be determined.
|
||||
UNKNOWN_ADDRESS_SPACE = ~0u,
|
||||
};
|
||||
}
|
||||
|
||||
/// Because there are two stacks in ventus, we need to add a VGPRSpill according
|
||||
|
|
|
@ -36,9 +36,11 @@
|
|||
#include "llvm/Passes/PassBuilder.h"
|
||||
#include "llvm/Support/FormattedStream.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/IPO.h"
|
||||
#include "llvm/Transforms/IPO/AlwaysInliner.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include <optional>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<bool> EnableRedundantCopyElimination(
|
||||
|
@ -56,6 +58,11 @@ static cl::opt<bool>
|
|||
cl::desc("Enable the machine combiner pass"),
|
||||
cl::init(true), cl::Hidden);
|
||||
|
||||
// Option to inline all early.
|
||||
static cl::opt<bool> EarlyInlineAll("ventus-early-inline-all",
|
||||
cl::desc("Inline all functions early"),
|
||||
cl::init(false), cl::Hidden);
|
||||
|
||||
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
|
||||
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
|
||||
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
|
||||
|
@ -68,6 +75,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
|
|||
initializeRISCVPreRAExpandPseudoPass(*PR);
|
||||
initializeRISCVExpandPseudoPass(*PR);
|
||||
initializeVentusPrintfRuntimeBindingPass(*PR);
|
||||
initializeVentusAlwaysInlinePass(*PR);
|
||||
}
|
||||
|
||||
static StringRef computeDataLayout(const Triple &TT, StringRef CPU) {
|
||||
|
@ -75,7 +83,7 @@ static StringRef computeDataLayout(const Triple &TT, StringRef CPU) {
|
|||
// return "e-m:e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256"
|
||||
// "-v256:256-v512:512-v1024:1024-n32:64-S128-A5-G1";
|
||||
bool IsRV32 = TT.isRISCV32();
|
||||
if(!IsRV32)
|
||||
if (!IsRV32)
|
||||
return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128-A5-G1";
|
||||
assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported");
|
||||
return "e-m:e-p:32:32-i64:64-n32-S128-A5-G1";
|
||||
|
@ -145,12 +153,19 @@ void RISCVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
|
|||
PM.addPass(VentusPrintfRuntimeBindingPass());
|
||||
return true;
|
||||
}
|
||||
if (PassName == "ventus-always-inline") {
|
||||
PM.addPass(VentusAlwaysInlinePass());
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
PB.registerPipelineEarlySimplificationEPCallback(
|
||||
[this](ModulePassManager &PM, OptimizationLevel Level) {
|
||||
PM.addPass(VentusPrintfRuntimeBindingPass());
|
||||
|
||||
if (EarlyInlineAll)
|
||||
PM.addPass(VentusAlwaysInlinePass());
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,156 @@
|
|||
//===-- VentusAlwaysInlinePass.cpp - Force Function Inlining --------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// This pass marks functions for inlining in Ventus code. Specifically:
|
||||
/// 1. Functions accessing LOCAL memory (addrspace(3)) are marked as always_inline
|
||||
/// 2. Under stress-calls mode, non-kernel functions are marked as noinline
|
||||
/// 3. Otherwise, non-kernel functions are marked as always_inline
|
||||
/// 4. Function aliases are replaced with their targets and optionally removed
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "RISCV.h"
|
||||
#include "RISCVSubtarget.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define VENTUS_ALWAYS_INLINE "Ventus Inline All Functions"
|
||||
#define DEBUG_TYPE "ventus-always-inline"
|
||||
|
||||
namespace {
|
||||
|
||||
static cl::opt<bool> StressCalls("ventus-stress-function-calls", cl::Hidden,
|
||||
cl::desc("Force all functions to be noinline"),
|
||||
cl::init(false));
|
||||
|
||||
class VentusAlwaysInline : public ModulePass {
|
||||
bool GlobalOpt;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
VentusAlwaysInline(bool GlobalOpt = false)
|
||||
: ModulePass(ID), GlobalOpt(GlobalOpt) {}
|
||||
|
||||
bool runOnModule(Module &M) override;
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.setPreservesAll();
|
||||
}
|
||||
|
||||
StringRef getPassName() const override { return VENTUS_ALWAYS_INLINE; }
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
char VentusAlwaysInline::ID = 0;
|
||||
|
||||
static void
|
||||
recursivelyVisitUsers(GlobalValue &GV,
|
||||
SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {
|
||||
SmallVector<User *, 16> Stack(GV.users());
|
||||
SmallPtrSet<const Value *, 8> Visited;
|
||||
|
||||
while (!Stack.empty()) {
|
||||
User *U = Stack.pop_back_val();
|
||||
if (!Visited.insert(U).second)
|
||||
continue;
|
||||
|
||||
if (Instruction *I = dyn_cast<Instruction>(U)) {
|
||||
Function *F = I->getParent()->getParent();
|
||||
if (F->getCallingConv() != CallingConv::VENTUS_KERNEL) {
|
||||
F->removeFnAttr(Attribute::NoInline);
|
||||
FuncsToAlwaysInline.insert(F);
|
||||
Stack.push_back(F);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
append_range(Stack, U->users());
|
||||
}
|
||||
}
|
||||
|
||||
static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
|
||||
std::vector<GlobalAlias *> AliasesToRemove;
|
||||
|
||||
SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
|
||||
SmallPtrSet<Function *, 8> FuncsToNoInline;
|
||||
|
||||
Triple TT(M.getTargetTriple());
|
||||
|
||||
for (GlobalAlias &A : M.aliases()) {
|
||||
if (Function *F = dyn_cast<Function>(A.getAliasee())) {
|
||||
if (TT.getArch() == Triple::riscv32 &&
|
||||
A.getLinkage() != GlobalValue::InternalLinkage)
|
||||
continue;
|
||||
A.replaceAllUsesWith(F);
|
||||
AliasesToRemove.push_back(&A);
|
||||
}
|
||||
// FIXME: If the aliasee isn't a function, it's some kind of constant expr
|
||||
// cast that won't be inlined through.
|
||||
}
|
||||
|
||||
if (GlobalOpt) {
|
||||
for (GlobalAlias *A : AliasesToRemove) {
|
||||
A->eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
||||
for (GlobalVariable &GV : M.globals()) {
|
||||
unsigned AS = GV.getAddressSpace();
|
||||
if (AS == RISCVAS::LOCAL_ADDRESS) {
|
||||
recursivelyVisitUsers(GV, FuncsToAlwaysInline);
|
||||
}
|
||||
}
|
||||
|
||||
auto IncompatAttr =
|
||||
StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;
|
||||
|
||||
for (Function &F : M) {
|
||||
if (!F.isDeclaration() && !F.use_empty() &&
|
||||
!F.hasFnAttribute(IncompatAttr) &&
|
||||
F.getCallingConv() != CallingConv::VENTUS_KERNEL) {
|
||||
if (StressCalls) {
|
||||
if (!FuncsToAlwaysInline.count(&F))
|
||||
FuncsToNoInline.insert(&F);
|
||||
} else
|
||||
FuncsToAlwaysInline.insert(&F);
|
||||
}
|
||||
}
|
||||
|
||||
for (Function *F : FuncsToAlwaysInline)
|
||||
F->addFnAttr(Attribute::AlwaysInline);
|
||||
|
||||
for (Function *F : FuncsToNoInline)
|
||||
F->addFnAttr(Attribute::NoInline);
|
||||
|
||||
return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();
|
||||
}
|
||||
|
||||
bool VentusAlwaysInline::runOnModule(Module &M) {
|
||||
return alwaysInlineImpl(M, GlobalOpt);
|
||||
}
|
||||
|
||||
INITIALIZE_PASS(VentusAlwaysInline, "ventus-always-inline",
|
||||
VENTUS_ALWAYS_INLINE, false, false)
|
||||
|
||||
namespace llvm {
|
||||
ModulePass *createVentusAlwaysInlinePass(bool GlobalOpt) {
|
||||
return new VentusAlwaysInline(GlobalOpt);
|
||||
}
|
||||
|
||||
PreservedAnalyses VentusAlwaysInlinePass::run(Module &M,
|
||||
ModuleAnalysisManager &AM) {
|
||||
alwaysInlineImpl(M, GlobalOpt);
|
||||
return PreservedAnalyses::all();
|
||||
}
|
||||
} // end namespace llvm
|
|
@ -0,0 +1,89 @@
|
|||
; RUN: opt -S -mtriple=riscv32 -passes=ventus-always-inline %s | FileCheck --check-prefix=ALL %s
|
||||
; RUN: opt -S -mtriple=riscv32 -ventus-stress-function-calls -passes=ventus-always-inline %s | FileCheck --check-prefix=ALL %s
|
||||
|
||||
@local0 = addrspace(3) global i32 undef, align 4
|
||||
@local1 = addrspace(3) global [512 x i32] undef, align 4
|
||||
@nested.local.address = addrspace(1) global ptr addrspace(3) @local0, align 4
|
||||
|
||||
@alias.local0 = alias i32, ptr addrspace(3) @local0
|
||||
@local.cycle = addrspace(3) global i32 ptrtoint (ptr addrspace(3) @local.cycle to i32), align 4
|
||||
|
||||
|
||||
; ALL-LABEL: define i32 @load_local_simple() #0 {
|
||||
define i32 @load_local_simple() {
|
||||
%load = load i32, ptr addrspace(3) @local0, align 4
|
||||
ret i32 %load
|
||||
}
|
||||
|
||||
; ALL-LABEL: define i32 @load_local_const_gep() #0 {
|
||||
define i32 @load_local_const_gep() {
|
||||
%load = load i32, ptr addrspace(3) getelementptr inbounds ([512 x i32], ptr addrspace(3) @local1, i64 0, i64 4), align 4
|
||||
ret i32 %load
|
||||
}
|
||||
|
||||
; ALL-LABEL: define i32 @load_local_var_gep(i32 %idx) #0 {
|
||||
define i32 @load_local_var_gep(i32 %idx) {
|
||||
%gep = getelementptr inbounds [512 x i32], ptr addrspace(3) @local1, i32 0, i32 %idx
|
||||
%load = load i32, ptr addrspace(3) %gep, align 4
|
||||
ret i32 %load
|
||||
}
|
||||
|
||||
; ALL-LABEL: define ptr addrspace(3) @load_nested_address(i32 %idx) #0 {
|
||||
define ptr addrspace(3) @load_nested_address(i32 %idx) {
|
||||
%load = load ptr addrspace(3), ptr addrspace(1) @nested.local.address, align 4
|
||||
ret ptr addrspace(3) %load
|
||||
}
|
||||
|
||||
; ALL-LABEL: define i32 @load_local_alias() #0 {
|
||||
define i32 @load_local_alias() {
|
||||
%load = load i32, ptr addrspace(3) @alias.local0, align 4
|
||||
ret i32 %load
|
||||
}
|
||||
|
||||
; ALL-LABEL: define i32 @load_local_cycle() #0 {
|
||||
define i32 @load_local_cycle() {
|
||||
%load = load i32, ptr addrspace(3) @local.cycle, align 4
|
||||
ret i32 %load
|
||||
}
|
||||
|
||||
; ALL-LABEL: define i1 @icmp_local_address() #0 {
|
||||
define i1 @icmp_local_address() {
|
||||
ret i1 icmp eq (ptr addrspace(3) @local0, ptr addrspace(3) null)
|
||||
}
|
||||
|
||||
; ALL-LABEL: define i32 @transitive_call() #0 {
|
||||
define i32 @transitive_call() {
|
||||
%call = call i32 @load_local_simple()
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
; ALL-LABEL: define i32 @recursive_call_local(i32 %arg0) #0 {
|
||||
define i32 @recursive_call_local(i32 %arg0) {
|
||||
%load = load i32, ptr addrspace(3) @local0, align 4
|
||||
%add = add i32 %arg0, %load
|
||||
%call = call i32 @recursive_call_local(i32 %add)
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
; ALL-LABEL: define i32 @load_local_simple_noinline() #0 {
|
||||
define i32 @load_local_simple_noinline() noinline {
|
||||
%load = load i32, ptr addrspace(3) @local0, align 4
|
||||
ret i32 %load
|
||||
}
|
||||
|
||||
; ALL-LABEL: define i32 @recursive_call_local_noinline(i32 %arg0) #0 {
|
||||
define i32 @recursive_call_local_noinline(i32 %arg0) noinline {
|
||||
%load = load i32, ptr addrspace(3) @local0, align 4
|
||||
%add = add i32 %arg0, %load
|
||||
%call = call i32 @recursive_call_local(i32 %add)
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
; ALL-LABEL: define ventus_kernel void @kernel_with_local_access(
|
||||
define ventus_kernel void @kernel_with_local_access(ptr addrspace(1) %out) {
|
||||
%load = load i32, ptr addrspace(3) @local0, align 4
|
||||
store i32 %load, ptr addrspace(1) %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; ALL: attributes #0 = { alwaysinline }
|
|
@ -0,0 +1,18 @@
|
|||
; RUN: opt -S -mtriple=riscv32 -passes=ventus-always-inline %s | FileCheck %s
|
||||
|
||||
@internal_alias = internal alias i32 (i32), ptr @original_function
|
||||
@public_alias = alias i32 (i32), ptr @original_function
|
||||
|
||||
define i32 @original_function(i32 %x) {
|
||||
%result = add i32 %x, 42
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
define i32 @call_public() {
|
||||
%res = call i32 @public_alias(i32 7)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; CHECK: define i32 @original_function
|
||||
; CHECK-NOT: @internal_alias
|
||||
; CHECK: @public_alias
|
|
@ -0,0 +1,33 @@
|
|||
; RUN: opt -mtriple=riscv32 -O1 -S -inline-threshold=1 -ventus-early-inline-all %s | FileCheck %s
|
||||
|
||||
@c_alias = dso_local alias i32 (i32), ptr @callee
|
||||
|
||||
define dso_local i32 @callee(i32 %x) {
|
||||
entry:
|
||||
%mul1 = mul i32 %x, %x
|
||||
%mul2 = mul i32 %mul1, %x
|
||||
%mul3 = mul i32 %mul1, %mul2
|
||||
%mul4 = mul i32 %mul3, %mul2
|
||||
%mul5 = mul i32 %mul4, %mul3
|
||||
ret i32 %mul5
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @caller
|
||||
; CHECK: mul i32
|
||||
; CHECK-NOT: call i32
|
||||
|
||||
define ventus_kernel void @caller(i32 %x) {
|
||||
entry:
|
||||
%res = call i32 @callee(i32 %x)
|
||||
store volatile i32 %res, ptr addrspace(1) undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @alias_caller(
|
||||
; CHECK-NOT: call
|
||||
define ventus_kernel void @alias_caller(i32 %x) {
|
||||
entry:
|
||||
%res = call i32 @c_alias(i32 %x)
|
||||
store volatile i32 %res, ptr addrspace(1) undef
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue