mirror of https://github.com/microsoft/clang.git
[OPENMP, NVPTX] Do not globalize variables with reference/pointer types.
In generic data-sharing mode we do not need to globalize variables/parameters of reference/pointer types. They already are placed in the global memory. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@332380 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f6d41ea320
commit
a48ed89527
|
@ -220,7 +220,10 @@ class CheckVarsEscapingDeclContext final
|
|||
"Parameter captured by value with variably modified type");
|
||||
EscapedParameters.insert(VD);
|
||||
}
|
||||
}
|
||||
} else if (VD->getType()->isAnyPointerType() ||
|
||||
VD->getType()->isReferenceType())
|
||||
// Do not globalize variables with reference or pointer type.
|
||||
return;
|
||||
if (VD->getType()->isVariablyModifiedType())
|
||||
EscapedVariableLengthDecls.insert(VD);
|
||||
else
|
||||
|
@ -602,9 +605,12 @@ static const Stmt *getSingleCompoundChild(const Stmt *Body) {
|
|||
}
|
||||
|
||||
/// Check if the parallel directive has an 'if' clause with non-constant or
|
||||
/// false condition.
|
||||
static bool hasParallelIfClause(ASTContext &Ctx,
|
||||
const OMPExecutableDirective &D) {
|
||||
/// false condition. Also, check if the number of threads is strictly specified
|
||||
/// and run those directives in non-SPMD mode.
|
||||
static bool hasParallelIfNumThreadsClause(ASTContext &Ctx,
|
||||
const OMPExecutableDirective &D) {
|
||||
if (D.hasClausesOfKind<OMPNumThreadsClause>())
|
||||
return true;
|
||||
for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
|
||||
OpenMPDirectiveKind NameModifier = C->getNameModifier();
|
||||
if (NameModifier != OMPD_parallel && NameModifier != OMPD_unknown)
|
||||
|
@ -629,7 +635,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
|
|||
switch (D.getDirectiveKind()) {
|
||||
case OMPD_target:
|
||||
if (isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfClause(Ctx, *NestedDir))
|
||||
!hasParallelIfNumThreadsClause(Ctx, *NestedDir))
|
||||
return true;
|
||||
if (DKind == OMPD_teams || DKind == OMPD_teams_distribute) {
|
||||
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
|
||||
|
@ -639,7 +645,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
|
|||
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||
DKind = NND->getDirectiveKind();
|
||||
if (isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfClause(Ctx, *NND))
|
||||
!hasParallelIfNumThreadsClause(Ctx, *NND))
|
||||
return true;
|
||||
if (DKind == OMPD_distribute) {
|
||||
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
|
||||
|
@ -651,7 +657,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
|
|||
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||
DKind = NND->getDirectiveKind();
|
||||
return isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfClause(Ctx, *NND);
|
||||
!hasParallelIfNumThreadsClause(Ctx, *NND);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -659,7 +665,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
|
|||
return false;
|
||||
case OMPD_target_teams:
|
||||
if (isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfClause(Ctx, *NestedDir))
|
||||
!hasParallelIfNumThreadsClause(Ctx, *NestedDir))
|
||||
return true;
|
||||
if (DKind == OMPD_distribute) {
|
||||
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
|
||||
|
@ -669,13 +675,13 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
|
|||
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||
DKind = NND->getDirectiveKind();
|
||||
return isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfClause(Ctx, *NND);
|
||||
!hasParallelIfNumThreadsClause(Ctx, *NND);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
case OMPD_target_teams_distribute:
|
||||
return isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfClause(Ctx, *NestedDir);
|
||||
!hasParallelIfNumThreadsClause(Ctx, *NestedDir);
|
||||
case OMPD_target_simd:
|
||||
case OMPD_target_parallel:
|
||||
case OMPD_target_parallel_for:
|
||||
|
@ -746,7 +752,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
|
|||
case OMPD_target_parallel_for_simd:
|
||||
case OMPD_target_teams_distribute_parallel_for:
|
||||
case OMPD_target_teams_distribute_parallel_for_simd:
|
||||
return !hasParallelIfClause(Ctx, D);
|
||||
return !hasParallelIfNumThreadsClause(Ctx, D);
|
||||
case OMPD_target_simd:
|
||||
case OMPD_target_teams_distribute_simd:
|
||||
return false;
|
||||
|
@ -967,7 +973,6 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader(
|
|||
CGF.EmitBlock(ExecuteBB);
|
||||
|
||||
IsInTargetMasterThreadRegion = true;
|
||||
emitGenericVarsProlog(CGF, D.getLocStart());
|
||||
}
|
||||
|
||||
void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF,
|
||||
|
@ -976,8 +981,6 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF,
|
|||
if (!CGF.HaveInsertPoint())
|
||||
return;
|
||||
|
||||
emitGenericVarsEpilog(CGF);
|
||||
|
||||
if (!EST.ExitBB)
|
||||
EST.ExitBB = CGF.createBasicBlock(".exit");
|
||||
|
||||
|
@ -1464,8 +1467,7 @@ void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF,
|
|||
OpenMPProcBindClauseKind ProcBind,
|
||||
SourceLocation Loc) {
|
||||
// Do nothing in case of Spmd mode and L0 parallel.
|
||||
if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD &&
|
||||
IsInTargetMasterThreadRegion)
|
||||
if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
|
||||
return;
|
||||
|
||||
CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc);
|
||||
|
@ -1475,8 +1477,7 @@ void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF,
|
|||
llvm::Value *NumThreads,
|
||||
SourceLocation Loc) {
|
||||
// Do nothing in case of Spmd mode and L0 parallel.
|
||||
if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD &&
|
||||
IsInTargetMasterThreadRegion)
|
||||
if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
|
||||
return;
|
||||
|
||||
CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc);
|
||||
|
@ -1887,8 +1888,6 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall(
|
|||
// Just call the outlined function to execute the parallel region.
|
||||
// OutlinedFn(>id, &zero, CapturedStruct);
|
||||
//
|
||||
// TODO: Do something with IfCond when support for the 'if' clause
|
||||
// is added on Spmd target directives.
|
||||
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
|
||||
|
||||
Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth(
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
__thread int id;
|
||||
|
||||
int baz(int f);
|
||||
int baz(int f, double &a);
|
||||
|
||||
template<typename tx, typename ty>
|
||||
struct TT{
|
||||
|
@ -345,7 +345,7 @@ struct S1 {
|
|||
{
|
||||
this->a = (double)b + 1.5;
|
||||
c[1][1] = ++a;
|
||||
baz(a);
|
||||
baz(a, a);
|
||||
}
|
||||
|
||||
return c[1][1] + (int)b;
|
||||
|
@ -367,9 +367,9 @@ int bar(int n){
|
|||
return a;
|
||||
}
|
||||
|
||||
int baz(int f) {
|
||||
int baz(int f, double &a) {
|
||||
#pragma omp parallel
|
||||
f = 2;
|
||||
f = 2 + a;
|
||||
return f;
|
||||
}
|
||||
|
||||
|
@ -551,7 +551,7 @@ int baz(int f) {
|
|||
// CHECK: [[EXIT]]
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define i32 [[BAZ]](i32 [[F:%.*]])
|
||||
// CHECK: define i32 [[BAZ]](i32 [[F:%.*]], double* dereferenceable{{.*}})
|
||||
// CHECK: [[ZERO_ADDR:%.+]] = alloca i32,
|
||||
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
|
||||
// CHECK: [[GTID_ADDR:%.+]] = alloca i32,
|
||||
|
@ -559,13 +559,13 @@ int baz(int f) {
|
|||
// CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0)
|
||||
// CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to %struct._globalized_locals_ty*
|
||||
// CHECK: [[F_PTR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[REC_ADDR]], i32 0, i32 0
|
||||
// CHECK: store i32 [[F]], i32* [[F_PTR]],
|
||||
// CHECK: store i32 %{{.+}}, i32* [[F_PTR]],
|
||||
// CHECK: store i32 [[GTID]], i32* [[GTID_ADDR]],
|
||||
// CHECK: icmp eq i32
|
||||
// CHECK: br i1
|
||||
|
||||
// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1)
|
||||
// CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_PTR:%.+]], i{{64|32}} 1)
|
||||
// CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_PTR:%.+]], i{{64|32}} 2)
|
||||
// CHECK: [[SHARED:%.+]] = load i8**, i8*** [[SHARED_PTR]],
|
||||
// CHECK: [[REF:%.+]] = getelementptr inbounds i8*, i8** [[SHARED]], i{{64|32}} 0
|
||||
// CHECK: [[F_REF:%.+]] = bitcast i32* [[F_PTR]] to i8*
|
||||
|
@ -580,11 +580,11 @@ int baz(int f) {
|
|||
// CHECK: br i1
|
||||
|
||||
// CHECK: call void @__kmpc_serialized_parallel(%struct.ident_t* @{{.+}}, i32 [[GTID]])
|
||||
// CHECK: call void @__omp_outlined__(i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]])
|
||||
// CHECK: call void [[OUTLINED:@.+]](i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}})
|
||||
// CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @{{.+}}, i32 [[GTID]])
|
||||
// CHECK: br label
|
||||
|
||||
// CHECK: call void @__omp_outlined__(i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]])
|
||||
// CHECK: call void [[OUTLINED]](i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}})
|
||||
// CHECK: br label
|
||||
|
||||
// CHECK: [[RES:%.+]] = load i32, i32* [[F_PTR]],
|
||||
|
|
|
@ -55,6 +55,7 @@ int bar(int n){
|
|||
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}(
|
||||
// CHECK: [[AA_ADDR:%.+]] = alloca i16*, align
|
||||
// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack
|
||||
// CHECK: store i16* {{%.+}}, i16** [[AA_ADDR]], align
|
||||
// CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align
|
||||
// CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
|
|
|
@ -8,9 +8,9 @@
|
|||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l21}}_exec_mode = weak constant i8 0
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l26}}_exec_mode = weak constant i8 0
|
||||
// Check that the execution mode of all 2 target regions on the gpu is set to non-SPMD Mode.
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l21}}_exec_mode = weak constant i8 1
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l26}}_exec_mode = weak constant i8 1
|
||||
|
||||
template<typename tx>
|
||||
tx ftemplate(int n) {
|
||||
|
@ -46,23 +46,13 @@ int bar(int n){
|
|||
// CHECK: store i16* {{%.+}}, i16** [[AA_ADDR]], align
|
||||
// CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align
|
||||
// CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
||||
// CHECK: br label {{%?}}[[EXEC:.+]]
|
||||
//
|
||||
// CHECK: [[EXEC]]
|
||||
// CHECK-NOT: call void @__kmpc_push_num_threads
|
||||
// CHECK: {{call|invoke}} void [[OP1:@.+]]({{.+}}, {{.+}}, i16* [[AA]])
|
||||
// CHECK: br label {{%?}}[[DONE:.+]]
|
||||
//
|
||||
// CHECK: [[DONE]]
|
||||
// CHECK: call void @__kmpc_spmd_kernel_deinit()
|
||||
// CHECK: br label {{%?}}[[EXIT:.+]]
|
||||
//
|
||||
// CHECK: [[EXIT]]
|
||||
// CHECK: call void @__kmpc_kernel_init(i32
|
||||
// CHECK: call void @__kmpc_push_num_threads
|
||||
// CHECK: call void @__kmpc_kernel_deinit(i16 1)
|
||||
// CHECK: ret void
|
||||
// CHECK: }
|
||||
|
||||
// CHECK: define internal void [[OP1]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i16* {{[^%]*}}[[ARG:%.+]])
|
||||
// CHECK: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i16* {{[^%]*}}[[ARG:%.+]])
|
||||
// CHECK: = alloca i32*, align
|
||||
// CHECK: = alloca i32*, align
|
||||
// CHECK: [[AA_ADDR:%.+]] = alloca i16*, align
|
||||
|
@ -89,23 +79,13 @@ int bar(int n){
|
|||
// CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align
|
||||
// CHECK: [[B:%.+]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align
|
||||
// CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
|
||||
// CHECK: br label {{%?}}[[EXEC:.+]]
|
||||
//
|
||||
// CHECK: [[EXEC]]
|
||||
// CHECK-NOT: call void @__kmpc_push_num_threads
|
||||
// CHECK: {{call|invoke}} void [[OP2:@.+]]({{.+}}, {{.+}}, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]])
|
||||
// CHECK: br label {{%?}}[[DONE:.+]]
|
||||
//
|
||||
// CHECK: [[DONE]]
|
||||
// CHECK: call void @__kmpc_spmd_kernel_deinit()
|
||||
// CHECK: br label {{%?}}[[EXIT:.+]]
|
||||
//
|
||||
// CHECK: [[EXIT]]
|
||||
// CHECK: call void @__kmpc_kernel_init(i32
|
||||
// CHECK: call void @__kmpc_push_num_threads
|
||||
// CHECK: call void @__kmpc_kernel_deinit(i16 1)
|
||||
// CHECK: ret void
|
||||
// CHECK: }
|
||||
|
||||
// CHECK: define internal void [[OP2]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* {{[^%]*}}[[ARG1:%.+]], i16* {{[^%]*}}[[ARG2:%.+]], [10 x i32]* {{[^%]*}}[[ARG3:%.+]])
|
||||
// CHECK: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{[^%]*}}[[ARG1:%.+]], i16* {{[^%]*}}[[ARG2:%.+]], [10 x i32]* {{[^%]*}}[[ARG3:%.+]])
|
||||
// CHECK: = alloca i32*, align
|
||||
// CHECK: = alloca i32*, align
|
||||
// CHECK: [[A_ADDR:%.+]] = alloca i32*, align
|
||||
|
|
|
@ -45,7 +45,7 @@ tx ftemplate(int n) {
|
|||
b[i] += 1;
|
||||
}
|
||||
|
||||
#pragma omp target teams distribute parallel for collapse(2) firstprivate(f) private(k) num_threads(M)
|
||||
#pragma omp target teams distribute parallel for collapse(2) firstprivate(f) private(k)
|
||||
for(int i = 0; i < M; i++) {
|
||||
for(int j = 0; j < M; j++) {
|
||||
k = M;
|
||||
|
|
|
@ -43,7 +43,7 @@ tx ftemplate(int n) {
|
|||
b[i] += 1;
|
||||
}
|
||||
|
||||
#pragma omp target teams distribute parallel for simd collapse(2) firstprivate(f) private(k) num_threads(M)
|
||||
#pragma omp target teams distribute parallel for simd collapse(2) firstprivate(f) private(k)
|
||||
for(int i = 0; i < M; i++) {
|
||||
for(int j = 0; j < M; j++) {
|
||||
k = M;
|
||||
|
|
Loading…
Reference in New Issue