mirror of https://github.com/microsoft/clang.git
[CodeGen] Emit parallel_loop_access for each loop in the loop stack.
Summary: Emit !llvm.mem.parallel_loop_access metadata for memory accesses even if the parallel loop is not the top on the loop stack. Fixes llvm.org/PR37558. Reviewers: ABataev, hfinkel, amusman, tyler.nowicki Reviewed By: hfinkel Subscribers: Meinersbur, hfinkel, cfe-commits Differential Revision: https://reviews.llvm.org/D48808 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@338810 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
9824aedd2e
commit
f44b5cff85
|
@ -344,6 +344,17 @@ void LoopInfoStack::InsertHelper(Instruction *I) const {
|
|||
return;
|
||||
}
|
||||
|
||||
if (L.getAttributes().IsParallel && I->mayReadOrWriteMemory())
|
||||
I->setMetadata("llvm.mem.parallel_loop_access", L.getLoopID());
|
||||
if (I->mayReadOrWriteMemory()) {
|
||||
SmallVector<Metadata *, 2> ParallelLoopIDs;
|
||||
for (const LoopInfo &AL : Active)
|
||||
if (AL.getAttributes().IsParallel)
|
||||
ParallelLoopIDs.push_back(AL.getLoopID());
|
||||
|
||||
MDNode *ParallelMD = nullptr;
|
||||
if (ParallelLoopIDs.size() == 1)
|
||||
ParallelMD = cast<MDNode>(ParallelLoopIDs[0]);
|
||||
else if (ParallelLoopIDs.size() >= 2)
|
||||
ParallelMD = MDNode::get(I->getContext(), ParallelLoopIDs);
|
||||
I->setMetadata("llvm.mem.parallel_loop_access", ParallelMD);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck %s
|
||||
|
||||
// Verify that the inner access is tagged with a parallel_loop_access
|
||||
// for the inner and outer loop using a list.
|
||||
void vectorize_nested_test(int *List, int Length) {
|
||||
#pragma clang loop vectorize(assume_safety) interleave(disable) unroll(disable)
|
||||
for (int i = 0; i < Length; ++i) {
|
||||
#pragma clang loop vectorize(assume_safety) interleave(disable) unroll(disable)
|
||||
for (int j = 0; j < Length; ++j)
|
||||
List[i * Length + j] = (i + j) * 2;
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK: %[[MUL:.+]] = mul
|
||||
// CHECK: store i32 %[[MUL]], i32* %{{.+}}, !llvm.mem.parallel_loop_access ![[PARALLEL_LIST:[0-9]+]]
|
||||
// CHECK: br label %{{.+}}, !llvm.loop ![[INNER_LOOPID:[0-9]+]]
|
||||
// CHECK: br label %{{.+}}, !llvm.loop ![[OUTER_LOOPID:[0-9]+]]
|
||||
|
||||
// CHECK: ![[OUTER_LOOPID]] = distinct !{![[OUTER_LOOPID]],
|
||||
// CHECK: ![[PARALLEL_LIST]] = !{![[OUTER_LOOPID]], ![[INNER_LOOPID]]}
|
||||
// CHECK: ![[INNER_LOOPID]] = distinct !{![[INNER_LOOPID]],
|
|
@ -0,0 +1,20 @@
|
|||
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck %s
|
||||
|
||||
// Verify that the inner access is tagged with a parallel_loop_access
|
||||
// for the outer loop.
|
||||
void vectorize_outer_test(int *List, int Length) {
|
||||
#pragma clang loop vectorize(assume_safety) interleave(disable) unroll(disable)
|
||||
for (int i = 0; i < Length; i += 2) {
|
||||
#pragma clang loop unroll(full)
|
||||
for (int j = 0; j < 2; j += 1)
|
||||
List[i + j] = (i + j) * 2;
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK: %[[MUL:.+]] = mul
|
||||
// CHECK: store i32 %[[MUL]], i32* %{{.+}}, !llvm.mem.parallel_loop_access ![[OUTER_LOOPID:[0-9]+]]
|
||||
// CHECK: br label %{{.+}}, !llvm.loop ![[INNER_LOOPID:[0-9]+]]
|
||||
// CHECK: br label %{{.+}}, !llvm.loop ![[OUTER_LOOPID]]
|
||||
|
||||
// CHECK: ![[OUTER_LOOPID]] = distinct !{![[OUTER_LOOPID]],
|
||||
// CHECK: ![[INNER_LOOPID]] = distinct !{![[INNER_LOOPID]],
|
Loading…
Reference in New Issue