[CMake][OpenMP] Customize default offloading arch

For the shuffle instructions in reductions we need at least sm_30
but the user may want to customize the default architecture.

Differential Revision: https://reviews.llvm.org/D38883

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@315996 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jonas Hahnfeld 2017-10-17 13:37:36 +00:00
parent e10677d71b
commit 19e976a471
4 changed files with 19 additions and 21 deletions

View File

@ -235,6 +235,17 @@ endif()
set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING
"Default OpenMP runtime used by -fopenmp.")
# OpenMP offloading requires at least sm_30 because we use shuffle instructions
# to generate efficient code for reductions.
set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
"Default architecture for OpenMP offloading to Nvidia GPUs.")
string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}")
if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 30)
message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_30")
set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
"Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
endif()
set(CLANG_VENDOR ${PACKAGE_VENDOR} CACHE STRING
"Vendor-specific text for showing with version information.")

View File

@ -20,6 +20,9 @@
/* Default OpenMP runtime used by -fopenmp. */
#define CLANG_DEFAULT_OPENMP_RUNTIME "${CLANG_DEFAULT_OPENMP_RUNTIME}"
/* Default architecture for OpenMP offloading to Nvidia GPUs. */
#define CLANG_OPENMP_NVPTX_DEFAULT_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}"
/* Multilib suffix for libdir. */
#define CLANG_LIBDIR_SUFFIX "${CLANG_LIBDIR_SUFFIX}"

View File

@ -542,9 +542,9 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
// flags are not duplicated.
// Also append the compute capability.
if (DeviceOffloadKind == Action::OFK_OpenMP) {
for (Arg *A : Args){
for (Arg *A : Args) {
bool IsDuplicate = false;
for (Arg *DALArg : *DAL){
for (Arg *DALArg : *DAL) {
if (A == DALArg) {
IsDuplicate = true;
break;
@ -555,14 +555,9 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
}
StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
if (Arch.empty()) {
// Default compute capability for CUDA toolchain is the
// lowest compute capability supported by the installed
// CUDA version.
DAL->AddJoinedArg(nullptr,
Opts.getOption(options::OPT_march_EQ),
CudaInstallation.getLowestExistingArch());
}
if (Arch.empty())
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
return DAL;
}

View File

@ -76,17 +76,6 @@ public:
std::string getLibDeviceFile(StringRef Gpu) const {
return LibDeviceMap.lookup(Gpu);
}
/// \brief Get lowest available compute capability
/// for which a libdevice library exists.
std::string getLowestExistingArch() const {
std::string LibDeviceFile;
for (auto key : LibDeviceMap.keys()) {
LibDeviceFile = LibDeviceMap.lookup(key);
if (!LibDeviceFile.empty())
return key;
}
return "sm_20";
}
};
namespace tools {