mirror of https://github.com/microsoft/clang.git
[CMake][OpenMP] Customize default offloading arch
For the shuffle instructions in reductions we need at least sm_30 but the user may want to customize the default architecture. Differential Revision: https://reviews.llvm.org/D38883 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@315996 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e10677d71b
commit
19e976a471
|
@ -235,6 +235,17 @@ endif()
|
|||
set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING
|
||||
"Default OpenMP runtime used by -fopenmp.")
|
||||
|
||||
# OpenMP offloading requires at least sm_30 because we use shuffle instructions
|
||||
# to generate efficient code for reductions.
|
||||
set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
|
||||
"Default architecture for OpenMP offloading to Nvidia GPUs.")
|
||||
string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}")
|
||||
if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 30)
|
||||
message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_30")
|
||||
set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
|
||||
"Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
|
||||
endif()
|
||||
|
||||
set(CLANG_VENDOR ${PACKAGE_VENDOR} CACHE STRING
|
||||
"Vendor-specific text for showing with version information.")
|
||||
|
||||
|
|
|
@ -20,6 +20,9 @@
|
|||
/* Default OpenMP runtime used by -fopenmp. */
|
||||
#define CLANG_DEFAULT_OPENMP_RUNTIME "${CLANG_DEFAULT_OPENMP_RUNTIME}"
|
||||
|
||||
/* Default architecture for OpenMP offloading to Nvidia GPUs. */
|
||||
#define CLANG_OPENMP_NVPTX_DEFAULT_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}"
|
||||
|
||||
/* Multilib suffix for libdir. */
|
||||
#define CLANG_LIBDIR_SUFFIX "${CLANG_LIBDIR_SUFFIX}"
|
||||
|
||||
|
|
|
@ -555,14 +555,9 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
|
|||
}
|
||||
|
||||
StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
|
||||
if (Arch.empty()) {
|
||||
// Default compute capability for CUDA toolchain is the
|
||||
// lowest compute capability supported by the installed
|
||||
// CUDA version.
|
||||
DAL->AddJoinedArg(nullptr,
|
||||
Opts.getOption(options::OPT_march_EQ),
|
||||
CudaInstallation.getLowestExistingArch());
|
||||
}
|
||||
if (Arch.empty())
|
||||
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
|
||||
CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
|
||||
|
||||
return DAL;
|
||||
}
|
||||
|
|
|
@ -76,17 +76,6 @@ public:
|
|||
std::string getLibDeviceFile(StringRef Gpu) const {
|
||||
return LibDeviceMap.lookup(Gpu);
|
||||
}
|
||||
/// \brief Get lowest available compute capability
|
||||
/// for which a libdevice library exists.
|
||||
std::string getLowestExistingArch() const {
|
||||
std::string LibDeviceFile;
|
||||
for (auto key : LibDeviceMap.keys()) {
|
||||
LibDeviceFile = LibDeviceMap.lookup(key);
|
||||
if (!LibDeviceFile.empty())
|
||||
return key;
|
||||
}
|
||||
return "sm_20";
|
||||
}
|
||||
};
|
||||
|
||||
namespace tools {
|
||||
|
|
Loading…
Reference in New Issue