[Polly] Don't generate inter-iteration noalias metadata.
This metadata was intended to mark all accesses within an iteration to be pairwise non-aliasing, in this case because every memory of a base pointer is touched (read or write) at most once. This is typical for 'sweeps' over all data. The stated motivation from D30606 is to ensure that unrolled iterations are considered non-aliasing. Rhe implemention had multiple issues: * The structure of the noalias metadata was malformed. D110026 added check in the verifier for this metadata, and the tests were failing since then. * This is not true for the outer loops of the BLIS matrix multiplication, where it was being inserted. Each element of A, B, C is accessed multiple times, as often as the loop not used as an index is iterating. * Scopes were added to SecondLevelOtherAliasScopeList (used for the !noalias scop list) on-the-fly when another SCEV was seen. This meant that previously visited instructions would not be updated with alias scopes that are only seen later, missing out those SCEVs they should not be aliasing with. * Since the !noalias scope list would ideally consists of all other SCEV for this base pointer, we might run quickly into scalability issues. Especially after unrolling there would probably at least once SCEV per instruction and unroll instance. * The inter-iteration noalias base pointer was not removed after leaving the loop marked with it, effectively marking everything after it to noalias as well. A solution I considered was to mark each instruction as non-aliasing with its own scope. The instruction itself would obviously alias itself, but such construction might also be considered invalid. Duplicating the instruction (e.g. due to speculation) would mark the instruction non-aliasing with its clone. I don't want to go into this territory, especially since the original motivation of determining unrolled instances as noalias based on SCEV is the what scev-aa does as well. This effectively reverts D30606 and D35761.
This commit is contained in:
parent
a06db78fd9
commit
cad9f98a2a
|
@ -82,9 +82,6 @@ public:
|
|||
/// Delete the set of alternative alias bases
|
||||
void resetAlternativeAliasBases() { AlternativeAliasBases.clear(); }
|
||||
|
||||
/// Add inter iteration alias-free base pointer @p BasePtr.
|
||||
void addInterIterationAliasFreeBasePtr(llvm::Value *BasePtr);
|
||||
|
||||
/// Stack for surrounding BandAttr annotations.
|
||||
llvm::SmallVector<BandAttr *, 8> LoopAttrEnv;
|
||||
BandAttr *&getStagingAttrEnv() { return LoopAttrEnv.back(); }
|
||||
|
@ -93,16 +90,6 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
/// Annotate with the second level alias metadata
|
||||
///
|
||||
/// Annotate the instruction @p I with the second level alias metadata
|
||||
/// to distinguish the individual non-aliasing accesses that have inter
|
||||
/// iteration alias-free base pointers.
|
||||
///
|
||||
/// @param I The instruction to be annotated.
|
||||
/// @param BasePtr The base pointer of @p I.
|
||||
void annotateSecondLevel(llvm::Instruction *I, llvm::Value *BasePtr);
|
||||
|
||||
/// The ScalarEvolution analysis we use to find base pointers.
|
||||
llvm::ScalarEvolution *SE;
|
||||
|
||||
|
@ -122,16 +109,6 @@ private:
|
|||
llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::MDNode *>
|
||||
OtherAliasScopeListMap;
|
||||
|
||||
/// A map from pointers to second level alias scopes.
|
||||
llvm::DenseMap<const llvm::SCEV *, llvm::MDNode *> SecondLevelAliasScopeMap;
|
||||
|
||||
/// A map from pointers to second level alias scope list of other pointers.
|
||||
llvm::DenseMap<const llvm::SCEV *, llvm::MDNode *>
|
||||
SecondLevelOtherAliasScopeListMap;
|
||||
|
||||
/// Inter iteration alias-free base pointers.
|
||||
llvm::SmallPtrSet<llvm::Value *, 4> InterIterationAliasFreeBasePtrs;
|
||||
|
||||
llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::AssertingVH<llvm::Value>>
|
||||
AlternativeAliasBases;
|
||||
};
|
||||
|
|
|
@ -186,40 +186,6 @@ static llvm::Value *getMemAccInstPointerOperand(Instruction *Inst) {
|
|||
return MemInst.getPointerOperand();
|
||||
}
|
||||
|
||||
void ScopAnnotator::annotateSecondLevel(llvm::Instruction *Inst,
|
||||
llvm::Value *BasePtr) {
|
||||
Value *Ptr = getMemAccInstPointerOperand(Inst);
|
||||
if (!Ptr)
|
||||
return;
|
||||
|
||||
auto *PtrSCEV = SE->getSCEV(Ptr);
|
||||
auto *BasePtrSCEV = SE->getPointerBase(PtrSCEV);
|
||||
|
||||
auto SecondLevelAliasScope = SecondLevelAliasScopeMap.lookup(PtrSCEV);
|
||||
auto SecondLevelOtherAliasScopeList =
|
||||
SecondLevelOtherAliasScopeListMap.lookup(PtrSCEV);
|
||||
if (!SecondLevelAliasScope) {
|
||||
auto AliasScope = AliasScopeMap.lookup(BasePtr);
|
||||
if (!AliasScope)
|
||||
return;
|
||||
LLVMContext &Ctx = SE->getContext();
|
||||
SecondLevelAliasScope = getID(
|
||||
Ctx, AliasScope, MDString::get(Ctx, "second level alias metadata"));
|
||||
SecondLevelAliasScopeMap[PtrSCEV] = SecondLevelAliasScope;
|
||||
Metadata *Args = {SecondLevelAliasScope};
|
||||
auto SecondLevelBasePtrAliasScopeList =
|
||||
SecondLevelAliasScopeMap.lookup(BasePtrSCEV);
|
||||
SecondLevelAliasScopeMap[BasePtrSCEV] = MDNode::concatenate(
|
||||
SecondLevelBasePtrAliasScopeList, MDNode::get(Ctx, Args));
|
||||
auto OtherAliasScopeList = OtherAliasScopeListMap.lookup(BasePtr);
|
||||
SecondLevelOtherAliasScopeList = MDNode::concatenate(
|
||||
OtherAliasScopeList, SecondLevelBasePtrAliasScopeList);
|
||||
SecondLevelOtherAliasScopeListMap[PtrSCEV] = SecondLevelOtherAliasScopeList;
|
||||
}
|
||||
Inst->setMetadata("alias.scope", SecondLevelAliasScope);
|
||||
Inst->setMetadata("noalias", SecondLevelOtherAliasScopeList);
|
||||
}
|
||||
|
||||
/// Find the base pointer of an array access.
|
||||
///
|
||||
/// This should be equivalent to ScalarEvolution::getPointerBase, which we
|
||||
|
@ -299,18 +265,6 @@ void ScopAnnotator::annotate(Instruction *Inst) {
|
|||
"BasePtr either expected in AliasScopeMap and OtherAlias...Map");
|
||||
auto *OtherAliasScopeList = OtherAliasScopeListMap[BasePtr];
|
||||
|
||||
if (InterIterationAliasFreeBasePtrs.count(BasePtr)) {
|
||||
annotateSecondLevel(Inst, BasePtr);
|
||||
return;
|
||||
}
|
||||
|
||||
Inst->setMetadata("alias.scope", MDNode::get(SE->getContext(), AliasScope));
|
||||
Inst->setMetadata("noalias", OtherAliasScopeList);
|
||||
}
|
||||
|
||||
void ScopAnnotator::addInterIterationAliasFreeBasePtr(llvm::Value *BasePtr) {
|
||||
if (!BasePtr)
|
||||
return;
|
||||
|
||||
InterIterationAliasFreeBasePtrs.insert(BasePtr);
|
||||
}
|
||||
|
|
|
@ -423,10 +423,6 @@ void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) {
|
|||
isl_id_free(Id);
|
||||
return;
|
||||
}
|
||||
if (strcmp(isl_id_get_name(Id), "Inter iteration alias-free") == 0) {
|
||||
auto *BasePtr = static_cast<Value *>(isl_id_get_user(Id));
|
||||
Annotator.addInterIterationAliasFreeBasePtr(BasePtr);
|
||||
}
|
||||
|
||||
BandAttr *ChildLoopAttr = getLoopAttr(isl::manage_copy(Id));
|
||||
BandAttr *AncestorLoopAttr;
|
||||
|
|
|
@ -915,20 +915,6 @@ isolateAndUnrollMatMulInnerLoops(isl::schedule_node Node,
|
|||
return Node;
|
||||
}
|
||||
|
||||
/// Mark @p BasePtr with "Inter iteration alias-free" mark node.
|
||||
///
|
||||
/// @param Node The child of the mark node to be inserted.
|
||||
/// @param BasePtr The pointer to be marked.
|
||||
/// @return The modified isl_schedule_node.
|
||||
static isl::schedule_node markInterIterationAliasFree(isl::schedule_node Node,
|
||||
Value *BasePtr) {
|
||||
if (!BasePtr)
|
||||
return Node;
|
||||
|
||||
auto Id = isl::id::alloc(Node.ctx(), "Inter iteration alias-free", BasePtr);
|
||||
return Node.insert_mark(Id).child(0);
|
||||
}
|
||||
|
||||
/// Insert "Loop Vectorizer Disabled" mark node.
|
||||
///
|
||||
/// @param Node The child of the mark node to be inserted.
|
||||
|
@ -970,8 +956,6 @@ static isl::schedule_node optimizeMatMulPattern(isl::schedule_node Node,
|
|||
const TargetTransformInfo *TTI,
|
||||
MatMulInfoTy &MMI) {
|
||||
assert(TTI && "The target transform info should be provided.");
|
||||
Node = markInterIterationAliasFree(
|
||||
Node, MMI.WriteToC->getLatestScopArrayInfo()->getBasePtr());
|
||||
int DimOutNum = isl_schedule_node_band_n_member(Node.get());
|
||||
assert(DimOutNum > 2 && "In case of the matrix multiplication the loop nest "
|
||||
"and, consequently, the corresponding scheduling "
|
||||
|
|
|
@ -31,7 +31,6 @@
|
|||
; CHECK-NEXT: for (int c3 = 0; c3 <= min(31, -32 * c1 + 2999); c3 += 1)
|
||||
; CHECK-NEXT: Stmt_for_body3(32 * c0 + c2, 32 * c1 + c3);
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: // Inter iteration alias-free
|
||||
; CHECK-NEXT: // Register tiling - Tiles
|
||||
; CHECK-NEXT: for (int c0 = 0; c0 <= 23; c0 += 1)
|
||||
; CHECK-NEXT: for (int c1 = 0; c1 <= 2999; c1 += 1)
|
||||
|
|
|
@ -28,7 +28,6 @@
|
|||
; CHECK-NEXT: for (int c3 = 0; c3 <= 31; c3 += 1)
|
||||
; CHECK-NEXT: Stmt_bb9(32 * c0 + c2, 32 * c1 + c3);
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: // Inter iteration alias-free
|
||||
; CHECK-NEXT: // 1st level tiling - Tiles
|
||||
; CHECK-NEXT: for (int c1 = 0; c1 <= 3; c1 += 1) {
|
||||
; CHECK-NEXT: for (int c3 = 0; c3 <= 1055; c3 += 1)
|
||||
|
|
|
@ -1,69 +0,0 @@
|
|||
; RUN: opt %loadPolly -polly-opt-isl -polly-invariant-load-hoisting=true \
|
||||
; RUN: -polly-pattern-matching-based-opts=true \
|
||||
; RUN: -polly-target-throughput-vector-fma=1 \
|
||||
; RUN: -polly-target-latency-vector-fma=1 \
|
||||
; RUN: -polly-codegen -polly-target-1st-cache-level-associativity=8 \
|
||||
; RUN: -polly-target-2nd-cache-level-associativity=8 \
|
||||
; RUN: -polly-target-1st-cache-level-size=32768 \
|
||||
; RUN: -polly-target-vector-register-bitwidth=256 \
|
||||
; RUN: -polly-target-2nd-cache-level-size=262144 -S < %s \
|
||||
; RUN: | FileCheck %s
|
||||
;
|
||||
; This test case checks whether Polly generates second level alias metadata
|
||||
; to distinguish the specific accesses in case of the ublas gemm kernel.
|
||||
;
|
||||
; CHECK: !16 = distinct !{!16, !1, !"second level alias metadata"}
|
||||
; CHECK: !17 = distinct !{!17, !1, !"second level alias metadata"}
|
||||
; CHECK: !18 = !{!4, !5, !6, !7, !16}
|
||||
; CHECK: !19 = distinct !{!19, !1, !"second level alias metadata"}
|
||||
; CHECK: !20 = !{!4, !5, !6, !7, !16, !17}
|
||||
; CHECK: !21 = distinct !{!21, !1, !"second level alias metadata"}
|
||||
; CHECK: !22 = !{!4, !5, !6, !7, !16, !17, !19}
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
|
||||
define internal void @kernel_gemm(i32 %arg, i32 %arg1, i32 %arg2, double %arg3, double %arg4, [1056 x double]* %arg5, [1024 x double]* %arg6, [1056 x double]* %arg7) {
|
||||
bb:
|
||||
br label %bb8
|
||||
|
||||
bb8: ; preds = %bb29, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp30, %bb29 ]
|
||||
br label %bb9
|
||||
|
||||
bb9: ; preds = %bb26, %bb8
|
||||
%tmp10 = phi i64 [ 0, %bb8 ], [ %tmp27, %bb26 ]
|
||||
%tmp11 = getelementptr inbounds [1056 x double], [1056 x double]* %arg5, i64 %tmp, i64 %tmp10
|
||||
%tmp12 = load double, double* %tmp11, align 8
|
||||
%tmp13 = fmul double %tmp12, %arg4
|
||||
store double %tmp13, double* %tmp11, align 8
|
||||
br label %Copy_0
|
||||
|
||||
Copy_0: ; preds = %Copy_0, %bb9
|
||||
%tmp15 = phi i64 [ 0, %bb9 ], [ %tmp24, %Copy_0 ]
|
||||
%tmp16 = getelementptr inbounds [1024 x double], [1024 x double]* %arg6, i64 %tmp, i64 %tmp15
|
||||
%tmp17 = load double, double* %tmp16, align 8
|
||||
%tmp18 = fmul double %tmp17, %arg3
|
||||
%tmp19 = getelementptr inbounds [1056 x double], [1056 x double]* %arg7, i64 %tmp15, i64 %tmp10
|
||||
%tmp20 = load double, double* %tmp19, align 8
|
||||
%tmp21 = fmul double %tmp18, %tmp20
|
||||
%tmp22 = load double, double* %tmp11, align 8
|
||||
%tmp23 = fadd double %tmp22, %tmp21
|
||||
store double %tmp23, double* %tmp11, align 8
|
||||
%tmp24 = add nuw nsw i64 %tmp15, 1
|
||||
%tmp25 = icmp ne i64 %tmp24, 1024
|
||||
br i1 %tmp25, label %Copy_0, label %bb26
|
||||
|
||||
bb26: ; preds = %Copy_0
|
||||
%tmp27 = add nuw nsw i64 %tmp10, 1
|
||||
%tmp28 = icmp ne i64 %tmp27, 1056
|
||||
br i1 %tmp28, label %bb9, label %bb29
|
||||
|
||||
bb29: ; preds = %bb26
|
||||
%tmp30 = add nuw nsw i64 %tmp, 1
|
||||
%tmp31 = icmp ne i64 %tmp30, 1056
|
||||
br i1 %tmp31, label %bb8, label %bb32
|
||||
|
||||
bb32: ; preds = %bb29
|
||||
ret void
|
||||
}
|
|
@ -10,8 +10,7 @@
|
|||
;
|
||||
; Test whether isolation works as expected.
|
||||
;
|
||||
; CHECK: // Inter iteration alias-free
|
||||
; CHECK-NEXT: // 1st level tiling - Tiles
|
||||
; CHECK: // 1st level tiling - Tiles
|
||||
; CHECK-NEXT: for (int c0 = 0; c0 <= 1; c0 += 1)
|
||||
; CHECK-NEXT: for (int c1 = 0; c1 <= 6; c1 += 1) {
|
||||
; CHECK-NEXT: for (int c3 = 1536 * c0; c3 <= min(1999, 1536 * c0 + 1535); c3 += 1)
|
||||
|
|
|
@ -9,12 +9,8 @@
|
|||
; RUN: -polly-import-jscop-postfix=transformed -polly-codegen -S < %s \
|
||||
; RUN: | FileCheck %s
|
||||
;
|
||||
; Check that we do not create different alias sets for locations represented by
|
||||
; different raw pointers.
|
||||
; Check that we disable the Loop Vectorizer.
|
||||
;
|
||||
; Also check that we disable the Loop Vectorizer.
|
||||
;
|
||||
; CHECK-NOT: !76 = distinct !{!76, !5, !"second level alias metadata"}
|
||||
; CHECK: !{!"llvm.loop.vectorize.enable", i1 false}
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
|
|
@ -34,7 +34,6 @@
|
|||
; CHECK-NEXT: for (int c3 = 0; c3 <= 31; c3 += 1)
|
||||
; CHECK-NEXT: Stmt_bb9(32 * c0 + c2, 32 * c1 + c3);
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: // Inter iteration alias-free
|
||||
; CHECK-NEXT: // Register tiling - Tiles
|
||||
; CHECK-NEXT: for (int c0 = 0; c0 <= 131; c0 += 1)
|
||||
; CHECK-NEXT: for (int c1 = 0; c1 <= 263; c1 += 1)
|
||||
|
@ -87,7 +86,6 @@
|
|||
; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c3 = 0; c3 <= 31; c3 += 1)
|
||||
; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb9(32 * c0 + c2, 32 * c1 + c3);
|
||||
; EXTRACTION-OF-MACRO-KERNEL-NEXT: }
|
||||
; EXTRACTION-OF-MACRO-KERNEL-NEXT: // Inter iteration alias-free
|
||||
; EXTRACTION-OF-MACRO-KERNEL-NEXT: // 1st level tiling - Tiles
|
||||
; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c1 = 0; c1 <= 3; c1 += 1) {
|
||||
; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c3 = 0; c3 <= 1055; c3 += 1)
|
||||
|
|
|
@ -37,7 +37,6 @@
|
|||
; C[i][j] += A[i][k] * B[k][j];
|
||||
;
|
||||
; CHECK: if (ni >= 1) {
|
||||
; CHECK-NEXT: // Inter iteration alias-free
|
||||
; CHECK-NEXT: // 1st level tiling - Tiles
|
||||
; CHECK-NEXT: for (int c0 = 0; c0 <= floord(nj - 1, 2048); c0 += 1)
|
||||
; CHECK-NEXT: for (int c1 = 0; c1 <= floord(nk - 1, 256); c1 += 1) {
|
||||
|
|
Loading…
Reference in New Issue