[OpenMP] Introduce new flags to assert thread and team usage in the runtime
This patch adds two flags to be supported for the new runtime. The flags are `-fopenmp-assume-threads-oversubscription` and -fopenmp-assume-teams-oversubscription`. These add global values that can be checked by the work sharing runtime functions to make better judgements about how to distribute work between the threads. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D111348
This commit is contained in:
parent
af4599b8ab
commit
9efdca87c7
|
@ -244,6 +244,8 @@ LANGOPT(OpenMPCUDAReductionBufNum , 32, 1024, "Number of the reduction records i
|
|||
LANGOPT(OpenMPTargetNewRuntime , 1, 0, "Use the new bitcode library for OpenMP offloading")
|
||||
LANGOPT(OpenMPTargetDebug , 32, 0, "Enable debugging in the OpenMP offloading device RTL")
|
||||
LANGOPT(OpenMPOptimisticCollapse , 1, 0, "Use at most 32 bits to represent the collapsed loop nest counter.")
|
||||
LANGOPT(OpenMPThreadSubscription , 1, 0, "Assume work-shared loops do not have more iterations than participating threads.")
|
||||
LANGOPT(OpenMPTeamSubscription , 1, 0, "Assume distributed loops do not have more iterations than participating teams.")
|
||||
LANGOPT(RenderScript , 1, 0, "RenderScript")
|
||||
|
||||
LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device")
|
||||
|
|
|
@ -2427,6 +2427,14 @@ def fopenmp_target_debug : Flag<["-"], "fopenmp-target-debug">, Group<f_Group>,
|
|||
HelpText<"Enable debugging in the OpenMP offloading device RTL">;
|
||||
def fno_openmp_target_debug : Flag<["-"], "fno-openmp-target-debug">, Group<f_Group>, Flags<[NoArgumentUnused]>;
|
||||
def fopenmp_target_debug_EQ : Joined<["-"], "fopenmp-target-debug=">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
|
||||
def fopenmp_assume_teams_oversubscription : Flag<["-"], "fopenmp-assume-teams-oversubscription">,
|
||||
Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
|
||||
def fopenmp_assume_threads_oversubscription : Flag<["-"], "fopenmp-assume-threads-oversubscription">,
|
||||
Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
|
||||
def fno_openmp_assume_teams_oversubscription : Flag<["-"], "fno-openmp-assume-teams-oversubscription">,
|
||||
Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
|
||||
def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">,
|
||||
Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
|
||||
defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime",
|
||||
LangOpts<"OpenMPTargetNewRuntime">, DefaultFalse,
|
||||
PosFlag<SetTrue, [CC1Option], "Use the new bitcode library for OpenMP offloading">,
|
||||
|
|
|
@ -1200,8 +1200,14 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
|
|||
llvm_unreachable("OpenMP NVPTX can only handle device code.");
|
||||
|
||||
llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder();
|
||||
if (CGM.getLangOpts().OpenMPTargetNewRuntime)
|
||||
OMPBuilder.createDebugKind(CGM.getLangOpts().OpenMPTargetDebug);
|
||||
if (CGM.getLangOpts().OpenMPTargetNewRuntime) {
|
||||
OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTargetDebug,
|
||||
"__omp_rtl_debug_kind");
|
||||
OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTeamSubscription,
|
||||
"__omp_rtl_assume_teams_oversubscription");
|
||||
OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPThreadSubscription,
|
||||
"__omp_rtl_assume_threads_oversubscription");
|
||||
}
|
||||
}
|
||||
|
||||
void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF,
|
||||
|
|
|
@ -5815,6 +5815,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
options::OPT_fno_openmp_cuda_force_full_runtime,
|
||||
/*Default=*/false))
|
||||
CmdArgs.push_back("-fopenmp-cuda-force-full-runtime");
|
||||
|
||||
// When in OpenMP offloading mode, forward assumptions information about
|
||||
// thread and team counts in the device.
|
||||
if (Args.hasFlag(options::OPT_fopenmp_assume_teams_oversubscription,
|
||||
options::OPT_fno_openmp_assume_teams_oversubscription,
|
||||
/*Default=*/false))
|
||||
CmdArgs.push_back("-fopenmp-assume-teams-oversubscription");
|
||||
if (Args.hasFlag(options::OPT_fopenmp_assume_threads_oversubscription,
|
||||
options::OPT_fno_openmp_assume_threads_oversubscription,
|
||||
/*Default=*/false))
|
||||
CmdArgs.push_back("-fopenmp-assume-threads-oversubscription");
|
||||
break;
|
||||
default:
|
||||
// By default, if Clang doesn't know how to generate useful OpenMP code
|
||||
|
|
|
@ -3486,6 +3486,12 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts,
|
|||
if (Opts.OpenMPTargetNewRuntime)
|
||||
GenerateArg(Args, OPT_fopenmp_target_new_runtime, SA);
|
||||
|
||||
if (Opts.OpenMPThreadSubscription)
|
||||
GenerateArg(Args, OPT_fopenmp_assume_threads_oversubscription, SA);
|
||||
|
||||
if (Opts.OpenMPTeamSubscription)
|
||||
GenerateArg(Args, OPT_fopenmp_assume_teams_oversubscription, SA);
|
||||
|
||||
if (Opts.OpenMPTargetDebug != 0)
|
||||
GenerateArg(Args, OPT_fopenmp_target_debug_EQ,
|
||||
Twine(Opts.OpenMPTargetDebug), SA);
|
||||
|
@ -3928,6 +3934,13 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
|
|||
}
|
||||
}
|
||||
|
||||
if (Opts.OpenMPIsDevice && Opts.OpenMPTargetNewRuntime) {
|
||||
if (Args.hasArg(OPT_fopenmp_assume_teams_oversubscription))
|
||||
Opts.OpenMPTeamSubscription = true;
|
||||
if (Args.hasArg(OPT_fopenmp_assume_threads_oversubscription))
|
||||
Opts.OpenMPThreadSubscription = true;
|
||||
}
|
||||
|
||||
// Get the OpenMP target triples if any.
|
||||
if (Arg *A = Args.getLastArg(options::OPT_fopenmp_targets_EQ)) {
|
||||
enum ArchPtrSize { Arch16Bit, Arch32Bit, Arch64Bit };
|
||||
|
|
|
@ -1,24 +0,0 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex "__omp_rtl_debug_kind"
|
||||
// Test target codegen - host bc file has to be created first.
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug=111 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-EQ
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-DEFAULT
|
||||
// expected-no-diagnostics
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
//.
|
||||
// CHECK: @__omp_rtl_debug_kind = weak_odr constant i32 1
|
||||
//.
|
||||
// CHECK-EQ: @__omp_rtl_debug_kind = weak_odr constant i32 111
|
||||
//.
|
||||
// CHECK-DEFAULT: @__omp_rtl_debug_kind = weak_odr constant i32 0
|
||||
//.
|
||||
void foo() {
|
||||
#pragma omp target
|
||||
{ }
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,40 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex "__omp_rtl_"
|
||||
// Test target codegen - host bc file has to be created first.
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug=111 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-EQ
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-DEFAULT
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-assume-threads-oversubscription -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-THREADS
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-assume-teams-oversubscription -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-TEAMS
|
||||
// expected-no-diagnostics
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
//.
|
||||
// CHECK: @__omp_rtl_debug_kind = weak_odr constant i32 1
|
||||
// CHECK: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0
|
||||
// CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0
|
||||
//.
|
||||
// CHECK-EQ: @__omp_rtl_debug_kind = weak_odr constant i32 111
|
||||
// CHECK-EQ: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0
|
||||
// CHECK-EQ: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0
|
||||
//.
|
||||
// CHECK-DEFAULT: @__omp_rtl_debug_kind = weak_odr constant i32 0
|
||||
// CHECK-DEFAULT: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0
|
||||
// CHECK-DEFAULT: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0
|
||||
//.
|
||||
// CHECK-THREADS: @__omp_rtl_debug_kind = weak_odr constant i32 0
|
||||
// CHECK-THREADS: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0
|
||||
// CHECK-THREADS: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 1
|
||||
//.
|
||||
// CHECK-TEAMS: @__omp_rtl_debug_kind = weak_odr constant i32 0
|
||||
// CHECK-TEAMS: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 1
|
||||
// CHECK-TEAMS: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0
|
||||
//.
|
||||
void foo() {
|
||||
#pragma omp target
|
||||
{ }
|
||||
}
|
||||
|
||||
#endif
|
|
@ -683,9 +683,8 @@ public:
|
|||
omp::IdentFlag Flags = omp::IdentFlag(0),
|
||||
unsigned Reserve2Flags = 0);
|
||||
|
||||
/// Create a global value containing the \p DebugLevel to control debuggin in
|
||||
/// the module.
|
||||
GlobalValue *createDebugKind(unsigned DebugLevel);
|
||||
/// Create a global flag \p Namein the module with initial value \p Value.
|
||||
GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
|
||||
|
||||
/// Generate control flow and cleanup for cancellation.
|
||||
///
|
||||
|
|
|
@ -245,12 +245,12 @@ OpenMPIRBuilder::~OpenMPIRBuilder() {
|
|||
assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
|
||||
}
|
||||
|
||||
GlobalValue *OpenMPIRBuilder::createDebugKind(unsigned DebugKind) {
|
||||
GlobalValue *OpenMPIRBuilder::createGlobalFlag(unsigned Value, StringRef Name) {
|
||||
IntegerType *I32Ty = Type::getInt32Ty(M.getContext());
|
||||
auto *GV = new GlobalVariable(
|
||||
M, I32Ty,
|
||||
/* isConstant = */ true, GlobalValue::WeakODRLinkage,
|
||||
ConstantInt::get(I32Ty, DebugKind), "__omp_rtl_debug_kind");
|
||||
auto *GV =
|
||||
new GlobalVariable(M, I32Ty,
|
||||
/* isConstant = */ true, GlobalValue::WeakODRLinkage,
|
||||
ConstantInt::get(I32Ty, Value), Name);
|
||||
|
||||
return GV;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue