Revert "[Coroutines] Only do symmetric transfer if optimization is on"

This reverts commit 7782e080e8. According
to the discussion of WG21, symmetric transfer is a desired feature.
This commit is contained in:
Chuanqi Xu 2022-06-27 10:38:29 +08:00
parent 94460f5136
commit 24e53b01d5
20 changed files with 35 additions and 38 deletions

View File

@ -22,14 +22,14 @@
namespace llvm {
struct CoroSplitPass : PassInfoMixin<CoroSplitPass> {
CoroSplitPass(bool Optimizing = false) : Optimizing(Optimizing) {}
CoroSplitPass(bool OptimizeFrame = false) : OptimizeFrame(OptimizeFrame) {}
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR);
static bool isRequired() { return true; }
// Would be true if the Optimization level isn't O0.
bool Optimizing;
bool OptimizeFrame;
};
} // end namespace llvm

View File

@ -601,7 +601,7 @@ Expected<bool> parseInlinerPassOptions(StringRef Params) {
}
Expected<bool> parseCoroSplitPassOptions(StringRef Params) {
return parseSinglePassOption(Params, "optimizing", "CoroSplitPass");
return parseSinglePassOption(Params, "reuse-storage", "CoroSplitPass");
}
Expected<bool> parseEarlyCSEPassOptions(StringRef Params) {

View File

@ -192,7 +192,7 @@ CGSCC_PASS_WITH_PARAMS("coro-split",
return CoroSplitPass(OptimizeFrame);
},
parseCoroSplitPassOptions,
"optimizing")
"reuse-storage")
#undef CGSCC_PASS_WITH_PARAMS
#ifndef FUNCTION_ANALYSIS

View File

@ -608,7 +608,7 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,
}
});
if (!Shape.Optimizing) {
if (!Shape.OptimizeFrame) {
for (const auto &A : FrameData.Allocas) {
AllocaInst *Alloca = A.Alloca;
NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
@ -1692,14 +1692,14 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
&*Builder.GetInsertPoint());
// This dbg.declare is for the main function entry point. It
// will be deleted in all coro-split functions.
coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.Optimizing);
coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.OptimizeFrame);
}
}
// Salvage debug info on any dbg.addr that we see. We do not insert them
// into each block where we have a use though.
if (auto *DI = dyn_cast<DbgAddrIntrinsic>(U)) {
coro::salvageDebugInfo(DbgPtrAllocaCache, DI, Shape.Optimizing);
coro::salvageDebugInfo(DbgPtrAllocaCache, DI, Shape.OptimizeFrame);
}
// If we have a single edge PHINode, remove it and replace it with a
@ -2548,7 +2548,7 @@ static void collectFrameAllocas(Function &F, coro::Shape &Shape,
void coro::salvageDebugInfo(
SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,
DbgVariableIntrinsic *DVI, bool Optimizing) {
DbgVariableIntrinsic *DVI, bool OptimizeFrame) {
Function *F = DVI->getFunction();
IRBuilder<> Builder(F->getContext());
auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
@ -2601,7 +2601,7 @@ void coro::salvageDebugInfo(
//
// Avoid to create the alloca would be eliminated by optimization
// passes and the corresponding dbg.declares would be invalid.
if (!Optimizing)
if (!OptimizeFrame)
if (auto *Arg = dyn_cast<llvm::Argument>(Storage)) {
auto &Cached = DbgPtrAllocaCache[Storage];
if (!Cached) {

View File

@ -31,7 +31,7 @@ void replaceCoroFree(CoroIdInst *CoroId, bool Elide);
/// holding a pointer to the coroutine frame.
void salvageDebugInfo(
SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,
DbgVariableIntrinsic *DVI, bool Optimizing);
DbgVariableIntrinsic *DVI, bool OptimizeFrame);
// Keeps data and helper functions for lowering coroutine intrinsics.
struct LowererBase {
@ -104,7 +104,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
BasicBlock *AllocaSpillBlock;
/// This would only be true if optimization are enabled.
bool Optimizing;
bool OptimizeFrame;
struct SwitchLoweringStorage {
SwitchInst *ResumeSwitch;
@ -255,8 +255,8 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const;
Shape() = default;
explicit Shape(Function &F, bool Optimizing = false)
: Optimizing(Optimizing) {
explicit Shape(Function &F, bool OptimizeFrame = false)
: OptimizeFrame(OptimizeFrame) {
buildFrom(F);
}
void buildFrom(Function &F);

View File

@ -683,7 +683,7 @@ void CoroCloner::salvageDebugInfo() {
if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
Worklist.push_back(DVI);
for (DbgVariableIntrinsic *DVI : Worklist)
coro::salvageDebugInfo(DbgPtrAllocaCache, DVI, Shape.Optimizing);
coro::salvageDebugInfo(DbgPtrAllocaCache, DVI, Shape.OptimizeFrame);
// Remove all salvaged dbg.declare intrinsics that became
// either unreachable or stale due to the CoroSplit transformation.
@ -1351,8 +1351,8 @@ static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
}
// Add musttail to any resume instructions that is immediately followed by a
// suspend (i.e. ret) to implement symmetric transfer. We wouldn't do this in
// O0 since symmetric transfer is not part of standard now.
// suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call
// for symmetrical coroutine control transfer (C++ Coroutines TS extension).
// This transformation is done only in the resume part of the coroutine that has
// identical signature and calling convention as the coro.resume call.
static void addMustTailToCoroResumes(Function &F) {
@ -1580,10 +1580,7 @@ static void splitSwitchCoroutine(Function &F, coro::Shape &Shape,
postSplitCleanup(*DestroyClone);
postSplitCleanup(*CleanupClone);
// Prepare to do symmetric transfer. We only do this if optimization is
// enabled since the symmetric transfer is not part of the C++ standard now.
if (Shape.Optimizing)
addMustTailToCoroResumes(*ResumeClone);
addMustTailToCoroResumes(*ResumeClone);
// Store addresses resume/destroy/cleanup functions in the coroutine frame.
updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
@ -1888,14 +1885,14 @@ namespace {
static coro::Shape splitCoroutine(Function &F,
SmallVectorImpl<Function *> &Clones,
bool Optimizing) {
bool OptimizeFrame) {
PrettyStackTraceFunction prettyStackTrace(F);
// The suspend-crossing algorithm in buildCoroutineFrame get tripped
// up by uses in unreachable blocks, so remove them as a first pass.
removeUnreachableBlocks(F);
coro::Shape Shape(F, Optimizing);
coro::Shape Shape(F, OptimizeFrame);
if (!Shape.CoroBegin)
return Shape;
@ -1944,7 +1941,7 @@ static coro::Shape splitCoroutine(Function &F,
}
}
for (auto *DDI : Worklist)
coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.Optimizing);
coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.OptimizeFrame);
return Shape;
}
@ -2087,7 +2084,7 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
F.setSplittedCoroutine();
SmallVector<Function *, 4> Clones;
const coro::Shape Shape = splitCoroutine(F, Clones, Optimizing);
const coro::Shape Shape = splitCoroutine(F, Clones, OptimizeFrame);
updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
if (!Shape.CoroSuspends.empty()) {

View File

@ -1,4 +1,4 @@
; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split<optimizing>),function(sroa)' -S | FileCheck %s
; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split<reuse-storage>),function(sroa)' -S | FileCheck %s
; Checks whether the dbg.declare for `__promise` remains valid under O2.

View File

@ -1,5 +1,5 @@
; Check that we can handle spills of array allocas
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s
%struct.big_structure = type { [500 x i8] }
declare void @consume(%struct.big_structure*)

View File

@ -1,6 +1,6 @@
; Tests that variables in a Corotuine whose lifetime range is not overlapping each other
; re-use the same slot in Coroutine frame.
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s
%"struct.task::promise_type" = type { i8 }
%struct.awaitable = type { i8 }
%struct.big_structure = type { [500 x i8] }

View File

@ -1,6 +1,6 @@
; Tests that variables of different type in a Corotuine whose lifetime range is not overlapping each other
; re-use the same slot in Coroutine frame.
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s
%"struct.task::promise_type" = type { i8 }
%struct.awaitable = type { i8 }
%struct.big_structure = type { [500 x i8] }

View File

@ -1,6 +1,6 @@
; Tests that variables of different type with incompatible alignment in a Corotuine whose lifetime
; range is not overlapping each other should not re-use the same slot in Coroutine frame.
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s
%"struct.task::promise_type" = type { i8 }
%struct.awaitable = type { i8 }
%struct.big_structure = type { [500 x i8] }

View File

@ -1,6 +1,6 @@
; Tests that variables of different type with incompatible alignment in a Corotuine whose
; lifetime range is not overlapping each other re-use the same slot in CorotuineFrame.
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split<reuse-storage>),simplifycfg,early-cse' -S | FileCheck %s
%"struct.task::promise_type" = type { i8 }
%struct.awaitable = type { i8 }
%struct.big_structure = type { [500 x i8] }

View File

@ -1,6 +1,6 @@
; Tests that coro-split will convert coro.resume followed by a suspend to a
; musttail call.
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
define void @f() #0 {
entry:

View File

@ -1,6 +1,6 @@
; Tests that coro-split will convert coro.resume followed by a suspend to a
; musttail call.
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
define void @f() #0 {
entry:

View File

@ -1,6 +1,6 @@
; Tests that coro-split will convert coro.resume followed by a suspend to a
; musttail call.
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
define void @fakeresume1(i8*) {
entry:

View File

@ -1,6 +1,6 @@
; Tests that coro-split will convert coro.resume followed by a suspend to a
; musttail call.
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
define void @f() #0 {
entry:

View File

@ -1,6 +1,6 @@
; Tests that coro-split will convert a call before coro.suspend to a musttail call
; while the user of the coro.suspend is a icmpinst.
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
define void @fakeresume1(i8*) {
entry:

View File

@ -1,6 +1,6 @@
; Tests that sinked lifetime markers wouldn't provent optimization
; to convert a resuming call to a musttail call.
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
declare void @fakeresume1(i64* align 8)

View File

@ -3,7 +3,7 @@
; The difference between this and coro-split-musttail5.ll is that there is
; an extra bitcast instruction in the path, which makes it harder to
; optimize.
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
declare void @fakeresume1(i64* align 8)

View File

@ -3,7 +3,7 @@
; The difference between this and coro-split-musttail5.ll and coro-split-musttail5.ll
; is that this contains dead instruction generated during the transformation,
; which makes the optimization harder.
; RUN: opt < %s -passes='cgscc(coro-split<optimizing>),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
declare void @fakeresume1(i64* align 8)