[OpenMP] Introduce new flags to assert thread and team usage in the runtime

This patch adds two flags to be supported for the new runtime. The flags
are `-fopenmp-assume-threads-oversubscription` and
-fopenmp-assume-teams-oversubscription`. These add global values that
can be checked by the work sharing runtime functions to make better
judgements about how to distribute work between the threads.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D111348
This commit is contained in:
Joseph Huber 2021-10-07 15:43:56 -04:00
parent af4599b8ab
commit 9efdca87c7
9 changed files with 89 additions and 34 deletions

View File

@ -244,6 +244,8 @@ LANGOPT(OpenMPCUDAReductionBufNum , 32, 1024, "Number of the reduction records i
LANGOPT(OpenMPTargetNewRuntime , 1, 0, "Use the new bitcode library for OpenMP offloading")
LANGOPT(OpenMPTargetDebug , 32, 0, "Enable debugging in the OpenMP offloading device RTL")
LANGOPT(OpenMPOptimisticCollapse , 1, 0, "Use at most 32 bits to represent the collapsed loop nest counter.")
LANGOPT(OpenMPThreadSubscription , 1, 0, "Assume work-shared loops do not have more iterations than participating threads.")
LANGOPT(OpenMPTeamSubscription , 1, 0, "Assume distributed loops do not have more iterations than participating teams.")
LANGOPT(RenderScript , 1, 0, "RenderScript")
LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device")

View File

@ -2427,6 +2427,14 @@ def fopenmp_target_debug : Flag<["-"], "fopenmp-target-debug">, Group<f_Group>,
HelpText<"Enable debugging in the OpenMP offloading device RTL">;
def fno_openmp_target_debug : Flag<["-"], "fno-openmp-target-debug">, Group<f_Group>, Flags<[NoArgumentUnused]>;
def fopenmp_target_debug_EQ : Joined<["-"], "fopenmp-target-debug=">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
def fopenmp_assume_teams_oversubscription : Flag<["-"], "fopenmp-assume-teams-oversubscription">,
Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
def fopenmp_assume_threads_oversubscription : Flag<["-"], "fopenmp-assume-threads-oversubscription">,
Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
def fno_openmp_assume_teams_oversubscription : Flag<["-"], "fno-openmp-assume-teams-oversubscription">,
Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">,
Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime",
LangOpts<"OpenMPTargetNewRuntime">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "Use the new bitcode library for OpenMP offloading">,

View File

@ -1200,8 +1200,14 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
llvm_unreachable("OpenMP NVPTX can only handle device code.");
llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder();
if (CGM.getLangOpts().OpenMPTargetNewRuntime)
OMPBuilder.createDebugKind(CGM.getLangOpts().OpenMPTargetDebug);
if (CGM.getLangOpts().OpenMPTargetNewRuntime) {
OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTargetDebug,
"__omp_rtl_debug_kind");
OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTeamSubscription,
"__omp_rtl_assume_teams_oversubscription");
OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPThreadSubscription,
"__omp_rtl_assume_threads_oversubscription");
}
}
void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF,

View File

@ -5815,6 +5815,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_openmp_cuda_force_full_runtime,
/*Default=*/false))
CmdArgs.push_back("-fopenmp-cuda-force-full-runtime");
// When in OpenMP offloading mode, forward assumptions information about
// thread and team counts in the device.
if (Args.hasFlag(options::OPT_fopenmp_assume_teams_oversubscription,
options::OPT_fno_openmp_assume_teams_oversubscription,
/*Default=*/false))
CmdArgs.push_back("-fopenmp-assume-teams-oversubscription");
if (Args.hasFlag(options::OPT_fopenmp_assume_threads_oversubscription,
options::OPT_fno_openmp_assume_threads_oversubscription,
/*Default=*/false))
CmdArgs.push_back("-fopenmp-assume-threads-oversubscription");
break;
default:
// By default, if Clang doesn't know how to generate useful OpenMP code

View File

@ -3486,6 +3486,12 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts,
if (Opts.OpenMPTargetNewRuntime)
GenerateArg(Args, OPT_fopenmp_target_new_runtime, SA);
if (Opts.OpenMPThreadSubscription)
GenerateArg(Args, OPT_fopenmp_assume_threads_oversubscription, SA);
if (Opts.OpenMPTeamSubscription)
GenerateArg(Args, OPT_fopenmp_assume_teams_oversubscription, SA);
if (Opts.OpenMPTargetDebug != 0)
GenerateArg(Args, OPT_fopenmp_target_debug_EQ,
Twine(Opts.OpenMPTargetDebug), SA);
@ -3928,6 +3934,13 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
}
}
if (Opts.OpenMPIsDevice && Opts.OpenMPTargetNewRuntime) {
if (Args.hasArg(OPT_fopenmp_assume_teams_oversubscription))
Opts.OpenMPTeamSubscription = true;
if (Args.hasArg(OPT_fopenmp_assume_threads_oversubscription))
Opts.OpenMPThreadSubscription = true;
}
// Get the OpenMP target triples if any.
if (Arg *A = Args.getLastArg(options::OPT_fopenmp_targets_EQ)) {
enum ArchPtrSize { Arch16Bit, Arch32Bit, Arch64Bit };

View File

@ -1,24 +0,0 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex "__omp_rtl_debug_kind"
// Test target codegen - host bc file has to be created first.
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug=111 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-EQ
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-DEFAULT
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
//.
// CHECK: @__omp_rtl_debug_kind = weak_odr constant i32 1
//.
// CHECK-EQ: @__omp_rtl_debug_kind = weak_odr constant i32 111
//.
// CHECK-DEFAULT: @__omp_rtl_debug_kind = weak_odr constant i32 0
//.
void foo() {
#pragma omp target
{ }
}
#endif

View File

@ -0,0 +1,40 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex "__omp_rtl_"
// Test target codegen - host bc file has to be created first.
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug=111 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-EQ
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-DEFAULT
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-assume-threads-oversubscription -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-THREADS
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-assume-teams-oversubscription -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-TEAMS
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
//.
// CHECK: @__omp_rtl_debug_kind = weak_odr constant i32 1
// CHECK: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0
// CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0
//.
// CHECK-EQ: @__omp_rtl_debug_kind = weak_odr constant i32 111
// CHECK-EQ: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0
// CHECK-EQ: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0
//.
// CHECK-DEFAULT: @__omp_rtl_debug_kind = weak_odr constant i32 0
// CHECK-DEFAULT: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0
// CHECK-DEFAULT: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0
//.
// CHECK-THREADS: @__omp_rtl_debug_kind = weak_odr constant i32 0
// CHECK-THREADS: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0
// CHECK-THREADS: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 1
//.
// CHECK-TEAMS: @__omp_rtl_debug_kind = weak_odr constant i32 0
// CHECK-TEAMS: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 1
// CHECK-TEAMS: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0
//.
void foo() {
#pragma omp target
{ }
}
#endif

View File

@ -683,9 +683,8 @@ public:
omp::IdentFlag Flags = omp::IdentFlag(0),
unsigned Reserve2Flags = 0);
/// Create a global value containing the \p DebugLevel to control debuggin in
/// the module.
GlobalValue *createDebugKind(unsigned DebugLevel);
/// Create a global flag \p Namein the module with initial value \p Value.
GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
/// Generate control flow and cleanup for cancellation.
///

View File

@ -245,12 +245,12 @@ OpenMPIRBuilder::~OpenMPIRBuilder() {
assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
}
GlobalValue *OpenMPIRBuilder::createDebugKind(unsigned DebugKind) {
GlobalValue *OpenMPIRBuilder::createGlobalFlag(unsigned Value, StringRef Name) {
IntegerType *I32Ty = Type::getInt32Ty(M.getContext());
auto *GV = new GlobalVariable(
M, I32Ty,
/* isConstant = */ true, GlobalValue::WeakODRLinkage,
ConstantInt::get(I32Ty, DebugKind), "__omp_rtl_debug_kind");
auto *GV =
new GlobalVariable(M, I32Ty,
/* isConstant = */ true, GlobalValue::WeakODRLinkage,
ConstantInt::get(I32Ty, Value), Name);
return GV;
}