[LICM] Disable thread-safety checks in single-thread model
If the single-thread model is used, or the -licm-force-thread-model-single flag is specified, skip checks related to thread-safety. This means that store promotion for conditionally executed stores only requires proof of dereferenceability and writability, but not of thread-safety. For example, this enables promotion of stores to (non-constant) globals, as well as captured allocas. Fixes https://github.com/llvm/llvm-project/issues/50537. Differential Revision: https://reviews.llvm.org/D130466
This commit is contained in:
parent
deb82d4a20
commit
b920407cf5
|
@ -372,6 +372,8 @@ public:
|
|||
|
||||
unsigned getAssumedAddrSpace(const Value *V) const;
|
||||
|
||||
bool isSingleThreaded() const;
|
||||
|
||||
std::pair<const Value *, unsigned>
|
||||
getPredicatedAddrSpace(const Value *V) const;
|
||||
|
||||
|
@ -1581,6 +1583,7 @@ public:
|
|||
virtual bool
|
||||
canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
|
||||
virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
|
||||
virtual bool isSingleThreaded() const = 0;
|
||||
virtual std::pair<const Value *, unsigned>
|
||||
getPredicatedAddrSpace(const Value *V) const = 0;
|
||||
virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
|
||||
|
@ -1959,6 +1962,8 @@ public:
|
|||
return Impl.getAssumedAddrSpace(V);
|
||||
}
|
||||
|
||||
bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
|
||||
|
||||
std::pair<const Value *, unsigned>
|
||||
getPredicatedAddrSpace(const Value *V) const override {
|
||||
return Impl.getPredicatedAddrSpace(V);
|
||||
|
|
|
@ -108,6 +108,8 @@ public:
|
|||
|
||||
unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
|
||||
|
||||
bool isSingleThreaded() const { return false; }
|
||||
|
||||
std::pair<const Value *, unsigned>
|
||||
getPredicatedAddrSpace(const Value *V) const {
|
||||
return std::make_pair(nullptr, -1);
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
#include "llvm/Support/MachineValueType.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
|
@ -287,6 +288,11 @@ public:
|
|||
return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
|
||||
}
|
||||
|
||||
bool isSingleThreaded() const {
|
||||
return getTLI()->getTargetMachine().Options.ThreadModel ==
|
||||
ThreadModel::Single;
|
||||
}
|
||||
|
||||
std::pair<const Value *, unsigned>
|
||||
getPredicatedAddrSpace(const Value *V) const {
|
||||
return getTLI()->getTargetMachine().getPredicatedAddrSpace(V);
|
||||
|
|
|
@ -210,8 +210,9 @@ bool promoteLoopAccessesToScalars(
|
|||
const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
|
||||
SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
|
||||
PredIteratorCache &, LoopInfo *, DominatorTree *, AssumptionCache *AC,
|
||||
const TargetLibraryInfo *, Loop *, MemorySSAUpdater &, ICFLoopSafetyInfo *,
|
||||
OptimizationRemarkEmitter *, bool AllowSpeculation);
|
||||
const TargetLibraryInfo *, TargetTransformInfo *, Loop *,
|
||||
MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *,
|
||||
bool AllowSpeculation);
|
||||
|
||||
/// Does a BFS from a given node to all of its children inside a given loop.
|
||||
/// The returned vector of nodes includes the starting point.
|
||||
|
|
|
@ -273,6 +273,10 @@ unsigned TargetTransformInfo::getAssumedAddrSpace(const Value *V) const {
|
|||
return TTIImpl->getAssumedAddrSpace(V);
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::isSingleThreaded() const {
|
||||
return TTIImpl->isSingleThreaded();
|
||||
}
|
||||
|
||||
std::pair<const Value *, unsigned>
|
||||
TargetTransformInfo::getPredicatedAddrSpace(const Value *V) const {
|
||||
return TTIImpl->getPredicatedAddrSpace(V);
|
||||
|
|
|
@ -76,6 +76,7 @@
|
|||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
|
@ -112,6 +113,10 @@ static cl::opt<bool> ControlFlowHoisting(
|
|||
"licm-control-flow-hoisting", cl::Hidden, cl::init(false),
|
||||
cl::desc("Enable control flow (and PHI) hoisting in LICM"));
|
||||
|
||||
static cl::opt<bool>
|
||||
SingleThread("licm-force-thread-model-single", cl::Hidden, cl::init(false),
|
||||
cl::desc("Force thread model single in LICM pass"));
|
||||
|
||||
static cl::opt<uint32_t> MaxNumUsesTraversed(
|
||||
"licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
|
||||
cl::desc("Max num uses visited for identifying load "
|
||||
|
@ -489,7 +494,8 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
|
|||
collectPromotionCandidates(MSSA, AA, L)) {
|
||||
LocalPromoted |= promoteLoopAccessesToScalars(
|
||||
PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
|
||||
DT, AC, TLI, L, MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation);
|
||||
DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE,
|
||||
LicmAllowSpeculation);
|
||||
}
|
||||
Promoted |= LocalPromoted;
|
||||
} while (LocalPromoted);
|
||||
|
@ -1911,17 +1917,21 @@ bool isWritableObject(const Value *Object) {
|
|||
if (auto *A = dyn_cast<Argument>(Object))
|
||||
return A->hasByValAttr();
|
||||
|
||||
if (auto *G = dyn_cast<GlobalVariable>(Object))
|
||||
return !G->isConstant();
|
||||
|
||||
// TODO: Noalias has nothing to do with writability, this should check for
|
||||
// an allocator function.
|
||||
return isNoAliasCall(Object);
|
||||
}
|
||||
|
||||
bool isThreadLocalObject(const Value *Object, const Loop *L,
|
||||
DominatorTree *DT) {
|
||||
bool isThreadLocalObject(const Value *Object, const Loop *L, DominatorTree *DT,
|
||||
TargetTransformInfo *TTI) {
|
||||
// The object must be function-local to start with, and then not captured
|
||||
// before/in the loop.
|
||||
return isIdentifiedFunctionLocal(Object) &&
|
||||
isNotCapturedBeforeOrInLoop(Object, L, DT);
|
||||
return (isIdentifiedFunctionLocal(Object) &&
|
||||
isNotCapturedBeforeOrInLoop(Object, L, DT)) ||
|
||||
(TTI->isSingleThreaded() || SingleThread);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -1937,9 +1947,9 @@ bool llvm::promoteLoopAccessesToScalars(
|
|||
SmallVectorImpl<Instruction *> &InsertPts,
|
||||
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
|
||||
LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
|
||||
const TargetLibraryInfo *TLI, Loop *CurLoop, MemorySSAUpdater &MSSAU,
|
||||
ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE,
|
||||
bool AllowSpeculation) {
|
||||
const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop,
|
||||
MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
|
||||
OptimizationRemarkEmitter *ORE, bool AllowSpeculation) {
|
||||
// Verify inputs.
|
||||
assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
|
||||
SafetyInfo != nullptr &&
|
||||
|
@ -2150,7 +2160,8 @@ bool llvm::promoteLoopAccessesToScalars(
|
|||
// violating the memory model.
|
||||
if (StoreSafety == StoreSafetyUnknown) {
|
||||
Value *Object = getUnderlyingObject(SomePtr);
|
||||
if (isWritableObject(Object) && isThreadLocalObject(Object, CurLoop, DT))
|
||||
if (isWritableObject(Object) &&
|
||||
isThreadLocalObject(Object, CurLoop, DT, TTI))
|
||||
StoreSafety = StoreSafe;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -licm < %s | FileCheck %s
|
||||
; RUN: opt -S -licm < %s | FileCheck %s --check-prefixes=CHECK,MT
|
||||
; RUN: opt -S -licm -licm-force-thread-model-single < %s | FileCheck %s --check-prefixes=CHECK,ST
|
||||
|
||||
@g = external global i32
|
||||
@c = external constant i32
|
||||
|
@ -10,22 +11,40 @@ declare void @capture(ptr)
|
|||
; mode only loads can be promoted, as a different thread might write to the
|
||||
; global.
|
||||
define void @promote_global(i1 %c, i1 %c2) {
|
||||
; CHECK-LABEL: @promote_global(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[G_PROMOTED:%.*]] = load i32, ptr @g, align 4
|
||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[G_PROMOTED]], [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
|
||||
; CHECK: if:
|
||||
; CHECK-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
|
||||
; CHECK-NEXT: store i32 [[V_INC]], ptr @g, align 4
|
||||
; CHECK-NEXT: br label [[LATCH]]
|
||||
; CHECK: latch:
|
||||
; CHECK-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
|
||||
; CHECK-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret void
|
||||
; MT-LABEL: @promote_global(
|
||||
; MT-NEXT: entry:
|
||||
; MT-NEXT: [[G_PROMOTED:%.*]] = load i32, ptr @g, align 4
|
||||
; MT-NEXT: br label [[LOOP:%.*]]
|
||||
; MT: loop:
|
||||
; MT-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[G_PROMOTED]], [[ENTRY:%.*]] ]
|
||||
; MT-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
|
||||
; MT: if:
|
||||
; MT-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
|
||||
; MT-NEXT: store i32 [[V_INC]], ptr @g, align 4
|
||||
; MT-NEXT: br label [[LATCH]]
|
||||
; MT: latch:
|
||||
; MT-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
|
||||
; MT-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
|
||||
; MT: exit:
|
||||
; MT-NEXT: ret void
|
||||
;
|
||||
; ST-LABEL: @promote_global(
|
||||
; ST-NEXT: entry:
|
||||
; ST-NEXT: [[G_PROMOTED:%.*]] = load i32, ptr @g, align 4
|
||||
; ST-NEXT: br label [[LOOP:%.*]]
|
||||
; ST: loop:
|
||||
; ST-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[G_PROMOTED]], [[ENTRY:%.*]] ]
|
||||
; ST-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
|
||||
; ST: if:
|
||||
; ST-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
|
||||
; ST-NEXT: br label [[LATCH]]
|
||||
; ST: latch:
|
||||
; ST-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
|
||||
; ST-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
|
||||
; ST: exit:
|
||||
; ST-NEXT: [[V_INC1_LCSSA:%.*]] = phi i32 [ [[V_INC1]], [[LATCH]] ]
|
||||
; ST-NEXT: store i32 [[V_INC1_LCSSA]], ptr @g, align 4
|
||||
; ST-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
br label %loop
|
||||
|
@ -87,24 +106,44 @@ exit:
|
|||
; mode only loads can be promoted, as a different thread might write to the
|
||||
; captured alloca.
|
||||
define void @promote_captured_alloca(i1 %c, i1 %c2) {
|
||||
; CHECK-LABEL: @promote_captured_alloca(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: call void @capture(ptr [[A]])
|
||||
; CHECK-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 4
|
||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[A_PROMOTED]], [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
|
||||
; CHECK: if:
|
||||
; CHECK-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
|
||||
; CHECK-NEXT: store i32 [[V_INC]], ptr [[A]], align 4
|
||||
; CHECK-NEXT: br label [[LATCH]]
|
||||
; CHECK: latch:
|
||||
; CHECK-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
|
||||
; CHECK-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret void
|
||||
; MT-LABEL: @promote_captured_alloca(
|
||||
; MT-NEXT: entry:
|
||||
; MT-NEXT: [[A:%.*]] = alloca i32, align 4
|
||||
; MT-NEXT: call void @capture(ptr [[A]])
|
||||
; MT-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 4
|
||||
; MT-NEXT: br label [[LOOP:%.*]]
|
||||
; MT: loop:
|
||||
; MT-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[A_PROMOTED]], [[ENTRY:%.*]] ]
|
||||
; MT-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
|
||||
; MT: if:
|
||||
; MT-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
|
||||
; MT-NEXT: store i32 [[V_INC]], ptr [[A]], align 4
|
||||
; MT-NEXT: br label [[LATCH]]
|
||||
; MT: latch:
|
||||
; MT-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
|
||||
; MT-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
|
||||
; MT: exit:
|
||||
; MT-NEXT: ret void
|
||||
;
|
||||
; ST-LABEL: @promote_captured_alloca(
|
||||
; ST-NEXT: entry:
|
||||
; ST-NEXT: [[A:%.*]] = alloca i32, align 4
|
||||
; ST-NEXT: call void @capture(ptr [[A]])
|
||||
; ST-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 4
|
||||
; ST-NEXT: br label [[LOOP:%.*]]
|
||||
; ST: loop:
|
||||
; ST-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[A_PROMOTED]], [[ENTRY:%.*]] ]
|
||||
; ST-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
|
||||
; ST: if:
|
||||
; ST-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
|
||||
; ST-NEXT: br label [[LATCH]]
|
||||
; ST: latch:
|
||||
; ST-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
|
||||
; ST-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
|
||||
; ST: exit:
|
||||
; ST-NEXT: [[V_INC1_LCSSA:%.*]] = phi i32 [ [[V_INC1]], [[LATCH]] ]
|
||||
; ST-NEXT: store i32 [[V_INC1_LCSSA]], ptr [[A]], align 4
|
||||
; ST-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%a = alloca i32
|
||||
|
|
Loading…
Reference in New Issue