[LICM] Disable thread-safety checks in single-thread model

If the single-thread model is used, or the
-licm-force-thread-model-single flag is specified, skip checks
related to thread-safety. This means that store promotion for
conditionally executed stores only requires proof of
dereferenceability and writability, but not of thread-safety. For
example, this enables promotion of stores to (non-constant) globals,
as well as captured allocas.

Fixes https://github.com/llvm/llvm-project/issues/50537.

Differential Revision: https://reviews.llvm.org/D130466
This commit is contained in:
Shubham Narlawar 2022-10-10 16:47:14 +02:00 committed by Nikita Popov
parent deb82d4a20
commit b920407cf5
7 changed files with 114 additions and 46 deletions

View File

@ -372,6 +372,8 @@ public:
unsigned getAssumedAddrSpace(const Value *V) const;
bool isSingleThreaded() const;
std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const;
@ -1581,6 +1583,7 @@ public:
virtual bool
canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
virtual bool isSingleThreaded() const = 0;
virtual std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const = 0;
virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
@ -1959,6 +1962,8 @@ public:
return Impl.getAssumedAddrSpace(V);
}
bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const override {
return Impl.getPredicatedAddrSpace(V);

View File

@ -108,6 +108,8 @@ public:
unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
bool isSingleThreaded() const { return false; }
std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const {
return std::make_pair(nullptr, -1);

View File

@ -47,6 +47,7 @@
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@ -287,6 +288,11 @@ public:
return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
}
bool isSingleThreaded() const {
return getTLI()->getTargetMachine().Options.ThreadModel ==
ThreadModel::Single;
}
std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const {
return getTLI()->getTargetMachine().getPredicatedAddrSpace(V);

View File

@ -210,8 +210,9 @@ bool promoteLoopAccessesToScalars(
const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
PredIteratorCache &, LoopInfo *, DominatorTree *, AssumptionCache *AC,
const TargetLibraryInfo *, Loop *, MemorySSAUpdater &, ICFLoopSafetyInfo *,
OptimizationRemarkEmitter *, bool AllowSpeculation);
const TargetLibraryInfo *, TargetTransformInfo *, Loop *,
MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *,
bool AllowSpeculation);
/// Does a BFS from a given node to all of its children inside a given loop.
/// The returned vector of nodes includes the starting point.

View File

@ -273,6 +273,10 @@ unsigned TargetTransformInfo::getAssumedAddrSpace(const Value *V) const {
return TTIImpl->getAssumedAddrSpace(V);
}
bool TargetTransformInfo::isSingleThreaded() const {
return TTIImpl->isSingleThreaded();
}
std::pair<const Value *, unsigned>
TargetTransformInfo::getPredicatedAddrSpace(const Value *V) const {
return TTIImpl->getPredicatedAddrSpace(V);

View File

@ -76,6 +76,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@ -112,6 +113,10 @@ static cl::opt<bool> ControlFlowHoisting(
"licm-control-flow-hoisting", cl::Hidden, cl::init(false),
cl::desc("Enable control flow (and PHI) hoisting in LICM"));
static cl::opt<bool>
SingleThread("licm-force-thread-model-single", cl::Hidden, cl::init(false),
cl::desc("Force thread model single in LICM pass"));
static cl::opt<uint32_t> MaxNumUsesTraversed(
"licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
cl::desc("Max num uses visited for identifying load "
@ -489,7 +494,8 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
collectPromotionCandidates(MSSA, AA, L)) {
LocalPromoted |= promoteLoopAccessesToScalars(
PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
DT, AC, TLI, L, MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation);
DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE,
LicmAllowSpeculation);
}
Promoted |= LocalPromoted;
} while (LocalPromoted);
@ -1911,17 +1917,21 @@ bool isWritableObject(const Value *Object) {
if (auto *A = dyn_cast<Argument>(Object))
return A->hasByValAttr();
if (auto *G = dyn_cast<GlobalVariable>(Object))
return !G->isConstant();
// TODO: Noalias has nothing to do with writability, this should check for
// an allocator function.
return isNoAliasCall(Object);
}
bool isThreadLocalObject(const Value *Object, const Loop *L,
DominatorTree *DT) {
bool isThreadLocalObject(const Value *Object, const Loop *L, DominatorTree *DT,
TargetTransformInfo *TTI) {
// The object must be function-local to start with, and then not captured
// before/in the loop.
return isIdentifiedFunctionLocal(Object) &&
isNotCapturedBeforeOrInLoop(Object, L, DT);
return (isIdentifiedFunctionLocal(Object) &&
isNotCapturedBeforeOrInLoop(Object, L, DT)) ||
(TTI->isSingleThreaded() || SingleThread);
}
} // namespace
@ -1937,9 +1947,9 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVectorImpl<Instruction *> &InsertPts,
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
const TargetLibraryInfo *TLI, Loop *CurLoop, MemorySSAUpdater &MSSAU,
ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE,
bool AllowSpeculation) {
const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop,
MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE, bool AllowSpeculation) {
// Verify inputs.
assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
SafetyInfo != nullptr &&
@ -2150,7 +2160,8 @@ bool llvm::promoteLoopAccessesToScalars(
// violating the memory model.
if (StoreSafety == StoreSafetyUnknown) {
Value *Object = getUnderlyingObject(SomePtr);
if (isWritableObject(Object) && isThreadLocalObject(Object, CurLoop, DT))
if (isWritableObject(Object) &&
isThreadLocalObject(Object, CurLoop, DT, TTI))
StoreSafety = StoreSafe;
}

View File

@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -licm < %s | FileCheck %s
; RUN: opt -S -licm < %s | FileCheck %s --check-prefixes=CHECK,MT
; RUN: opt -S -licm -licm-force-thread-model-single < %s | FileCheck %s --check-prefixes=CHECK,ST
@g = external global i32
@c = external constant i32
@ -10,22 +11,40 @@ declare void @capture(ptr)
; mode only loads can be promoted, as a different thread might write to the
; global.
define void @promote_global(i1 %c, i1 %c2) {
; CHECK-LABEL: @promote_global(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[G_PROMOTED:%.*]] = load i32, ptr @g, align 4
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[G_PROMOTED]], [[ENTRY:%.*]] ]
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
; CHECK: if:
; CHECK-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
; CHECK-NEXT: store i32 [[V_INC]], ptr @g, align 4
; CHECK-NEXT: br label [[LATCH]]
; CHECK: latch:
; CHECK-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
; CHECK-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
; MT-LABEL: @promote_global(
; MT-NEXT: entry:
; MT-NEXT: [[G_PROMOTED:%.*]] = load i32, ptr @g, align 4
; MT-NEXT: br label [[LOOP:%.*]]
; MT: loop:
; MT-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[G_PROMOTED]], [[ENTRY:%.*]] ]
; MT-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
; MT: if:
; MT-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
; MT-NEXT: store i32 [[V_INC]], ptr @g, align 4
; MT-NEXT: br label [[LATCH]]
; MT: latch:
; MT-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
; MT-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
; MT: exit:
; MT-NEXT: ret void
;
; ST-LABEL: @promote_global(
; ST-NEXT: entry:
; ST-NEXT: [[G_PROMOTED:%.*]] = load i32, ptr @g, align 4
; ST-NEXT: br label [[LOOP:%.*]]
; ST: loop:
; ST-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[G_PROMOTED]], [[ENTRY:%.*]] ]
; ST-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
; ST: if:
; ST-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
; ST-NEXT: br label [[LATCH]]
; ST: latch:
; ST-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
; ST-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
; ST: exit:
; ST-NEXT: [[V_INC1_LCSSA:%.*]] = phi i32 [ [[V_INC1]], [[LATCH]] ]
; ST-NEXT: store i32 [[V_INC1_LCSSA]], ptr @g, align 4
; ST-NEXT: ret void
;
entry:
br label %loop
@ -87,24 +106,44 @@ exit:
; mode only loads can be promoted, as a different thread might write to the
; captured alloca.
define void @promote_captured_alloca(i1 %c, i1 %c2) {
; CHECK-LABEL: @promote_captured_alloca(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @capture(ptr [[A]])
; CHECK-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[A_PROMOTED]], [[ENTRY:%.*]] ]
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
; CHECK: if:
; CHECK-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
; CHECK-NEXT: store i32 [[V_INC]], ptr [[A]], align 4
; CHECK-NEXT: br label [[LATCH]]
; CHECK: latch:
; CHECK-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
; CHECK-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
; MT-LABEL: @promote_captured_alloca(
; MT-NEXT: entry:
; MT-NEXT: [[A:%.*]] = alloca i32, align 4
; MT-NEXT: call void @capture(ptr [[A]])
; MT-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 4
; MT-NEXT: br label [[LOOP:%.*]]
; MT: loop:
; MT-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[A_PROMOTED]], [[ENTRY:%.*]] ]
; MT-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
; MT: if:
; MT-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
; MT-NEXT: store i32 [[V_INC]], ptr [[A]], align 4
; MT-NEXT: br label [[LATCH]]
; MT: latch:
; MT-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
; MT-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
; MT: exit:
; MT-NEXT: ret void
;
; ST-LABEL: @promote_captured_alloca(
; ST-NEXT: entry:
; ST-NEXT: [[A:%.*]] = alloca i32, align 4
; ST-NEXT: call void @capture(ptr [[A]])
; ST-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 4
; ST-NEXT: br label [[LOOP:%.*]]
; ST: loop:
; ST-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[A_PROMOTED]], [[ENTRY:%.*]] ]
; ST-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
; ST: if:
; ST-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
; ST-NEXT: br label [[LATCH]]
; ST: latch:
; ST-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
; ST-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
; ST: exit:
; ST-NEXT: [[V_INC1_LCSSA:%.*]] = phi i32 [ [[V_INC1]], [[LATCH]] ]
; ST-NEXT: store i32 [[V_INC1_LCSSA]], ptr [[A]], align 4
; ST-NEXT: ret void
;
entry:
%a = alloca i32