mirror of https://github.com/microsoft/clang.git
[CUDA] Driver changes to support CUDA compilation on MacOS.
Summary: Compiling CUDA device code requires us to know the host toolchain, because CUDA device-side compiles pull in e.g. host headers. When we only supported Linux compilation, this worked because CudaToolChain, which is responsible for device-side CUDA compilation, inherited from the Linux toolchain. But in order to support MacOS, CudaToolChain needs to take a HostToolChain pointer. Because a CUDA toolchain now requires a host TC, we no longer will create a CUDA toolchain from Driver::getToolChain -- you have to go through CreateOffloadingDeviceToolChains. I am *pretty* sure this is correct, and that previously any attempt to create a CUDA toolchain through getToolChain() would eventually have resulted in us throwing "error: unsupported use of NVPTX for host compilation". In any case hacking getToolChain to create a CUDA+host toolchain would be wrong, because a Driver can be reused for multiple compilations, potentially with different host TCs, and getToolChain will cache the result, causing us to potentially use a stale host TC. So that's the main change in this patch. In addition, we have to pull CudaInstallationDetector out of Generic_GCC and into a top-level class. It's now used by the Generic_GCC and MachO toolchains. Reviewers: tra Subscribers: rryan, hfinkel, sfantao Differential Revision: https://reviews.llvm.org/D26774 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@287285 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c2d61705d6
commit
29df057cd3
|
@ -38,6 +38,7 @@ class FileSystem;
|
|||
|
||||
namespace driver {
|
||||
class Compilation;
|
||||
class CudaInstallationDetector;
|
||||
class Driver;
|
||||
class JobAction;
|
||||
class RegisterEffectiveTriple;
|
||||
|
|
|
@ -473,14 +473,18 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
|
|||
if (llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
|
||||
return types::isCuda(I.first);
|
||||
})) {
|
||||
const ToolChain &TC = getToolChain(
|
||||
C.getInputArgs(),
|
||||
llvm::Triple(C.getSingleOffloadToolChain<Action::OFK_Host>()
|
||||
->getTriple()
|
||||
.isArch64Bit()
|
||||
? "nvptx64-nvidia-cuda"
|
||||
: "nvptx-nvidia-cuda"));
|
||||
C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda);
|
||||
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
|
||||
const llvm::Triple &HostTriple = HostTC->getTriple();
|
||||
llvm::Triple CudaTriple(HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda"
|
||||
: "nvptx-nvidia-cuda");
|
||||
// Use the CUDA and host triples as the key into the ToolChains map, because
|
||||
// the device toolchain we create depends on both.
|
||||
ToolChain *&CudaTC = ToolChains[CudaTriple.str() + "/" + HostTriple.str()];
|
||||
if (!CudaTC) {
|
||||
CudaTC = new toolchains::CudaToolChain(*this, CudaTriple, *HostTC,
|
||||
C.getInputArgs());
|
||||
}
|
||||
C.addOffloadDeviceToolChain(CudaTC, Action::OFK_Cuda);
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -3717,9 +3721,6 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
|
|||
break;
|
||||
}
|
||||
break;
|
||||
case llvm::Triple::CUDA:
|
||||
TC = new toolchains::CudaToolChain(*this, Target, Args);
|
||||
break;
|
||||
case llvm::Triple::PS4:
|
||||
TC = new toolchains::PS4CPU(*this, Target, Args);
|
||||
break;
|
||||
|
@ -3761,6 +3762,12 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Intentionally omitted from the switch above: llvm::Triple::CUDA. CUDA
|
||||
// compiles always need two toolchains, the CUDA toolchain and the host
|
||||
// toolchain. So the only valid way to create a CUDA toolchain is via
|
||||
// CreateOffloadingDeviceToolChains.
|
||||
|
||||
return *TC;
|
||||
}
|
||||
|
||||
|
|
|
@ -52,7 +52,8 @@ MachO::MachO(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
|
|||
|
||||
/// Darwin - Darwin tool chain for i386 and x86_64.
|
||||
Darwin::Darwin(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
|
||||
: MachO(D, Triple, Args), TargetInitialized(false) {}
|
||||
: MachO(D, Triple, Args), TargetInitialized(false),
|
||||
CudaInstallation(D, Triple, Args) {}
|
||||
|
||||
types::ID MachO::LookupTypeForExtension(StringRef Ext) const {
|
||||
types::ID Ty = types::lookupTypeForExtension(Ext);
|
||||
|
@ -99,6 +100,11 @@ bool Darwin::hasBlocksRuntime() const {
|
|||
}
|
||||
}
|
||||
|
||||
void Darwin::AddCudaIncludeArgs(const ArgList &DriverArgs,
|
||||
ArgStringList &CC1Args) const {
|
||||
CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
|
||||
}
|
||||
|
||||
// This is just a MachO name translation routine and there's no
|
||||
// way to join this into ARMTargetParser without breaking all
|
||||
// other assumptions. Maybe MachO should consider standardising
|
||||
|
@ -1296,6 +1302,10 @@ SanitizerMask Darwin::getSupportedSanitizers() const {
|
|||
return Res;
|
||||
}
|
||||
|
||||
void Darwin::printVerboseInfo(raw_ostream &OS) const {
|
||||
CudaInstallation.print(OS);
|
||||
}
|
||||
|
||||
/// Generic_GCC - A tool chain using the 'gcc' command to perform
|
||||
/// all subcommands; this relies on gcc translating the majority of
|
||||
/// command line options.
|
||||
|
@ -1811,10 +1821,10 @@ static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
|
|||
return CudaVersion::UNKNOWN;
|
||||
}
|
||||
|
||||
// \brief -- try common CUDA installation paths looking for files we need for
|
||||
// CUDA compilation.
|
||||
void Generic_GCC::CudaInstallationDetector::init(
|
||||
const llvm::Triple &TargetTriple, const llvm::opt::ArgList &Args) {
|
||||
CudaInstallationDetector::CudaInstallationDetector(
|
||||
const Driver &D, const llvm::Triple &TargetTriple,
|
||||
const llvm::opt::ArgList &Args)
|
||||
: D(D) {
|
||||
SmallVector<std::string, 4> CudaPathCandidates;
|
||||
|
||||
if (Args.hasArg(options::OPT_cuda_path_EQ))
|
||||
|
@ -1835,13 +1845,25 @@ void Generic_GCC::CudaInstallationDetector::init(
|
|||
BinPath = CudaPath + "/bin";
|
||||
IncludePath = InstallPath + "/include";
|
||||
LibDevicePath = InstallPath + "/nvvm/libdevice";
|
||||
LibPath = InstallPath + (TargetTriple.isArch64Bit() ? "/lib64" : "/lib");
|
||||
|
||||
auto &FS = D.getVFS();
|
||||
if (!(FS.exists(IncludePath) && FS.exists(BinPath) && FS.exists(LibPath) &&
|
||||
if (!(FS.exists(IncludePath) && FS.exists(BinPath) &&
|
||||
FS.exists(LibDevicePath)))
|
||||
continue;
|
||||
|
||||
// On Linux, we have both lib and lib64 directories, and we need to choose
|
||||
// based on our triple. On MacOS, we have only a lib directory.
|
||||
//
|
||||
// It's sufficient for our purposes to be flexible: If both lib and lib64
|
||||
// exist, we choose whichever one matches our triple. Otherwise, if only
|
||||
// lib exists, we use it.
|
||||
if (TargetTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
|
||||
LibPath = InstallPath + "/lib64";
|
||||
else if (FS.exists(InstallPath + "/lib"))
|
||||
LibPath = InstallPath + "/lib";
|
||||
else
|
||||
continue;
|
||||
|
||||
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
|
||||
FS.getBufferForFile(InstallPath + "/version.txt");
|
||||
if (!VersionFile) {
|
||||
|
@ -1898,7 +1920,33 @@ void Generic_GCC::CudaInstallationDetector::init(
|
|||
}
|
||||
}
|
||||
|
||||
void Generic_GCC::CudaInstallationDetector::CheckCudaVersionSupportsArch(
|
||||
void CudaInstallationDetector::AddCudaIncludeArgs(
|
||||
const ArgList &DriverArgs, ArgStringList &CC1Args) const {
|
||||
if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
|
||||
// Add cuda_wrappers/* to our system include path. This lets us wrap
|
||||
// standard library headers.
|
||||
SmallString<128> P(D.ResourceDir);
|
||||
llvm::sys::path::append(P, "include");
|
||||
llvm::sys::path::append(P, "cuda_wrappers");
|
||||
CC1Args.push_back("-internal-isystem");
|
||||
CC1Args.push_back(DriverArgs.MakeArgString(P));
|
||||
}
|
||||
|
||||
if (DriverArgs.hasArg(options::OPT_nocudainc))
|
||||
return;
|
||||
|
||||
if (!isValid()) {
|
||||
D.Diag(diag::err_drv_no_cuda_installation);
|
||||
return;
|
||||
}
|
||||
|
||||
CC1Args.push_back("-internal-isystem");
|
||||
CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
|
||||
CC1Args.push_back("-include");
|
||||
CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
|
||||
}
|
||||
|
||||
void CudaInstallationDetector::CheckCudaVersionSupportsArch(
|
||||
CudaArch Arch) const {
|
||||
if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
|
||||
ArchsWithVersionTooLowErrors.count(Arch) > 0)
|
||||
|
@ -1913,7 +1961,7 @@ void Generic_GCC::CudaInstallationDetector::CheckCudaVersionSupportsArch(
|
|||
}
|
||||
}
|
||||
|
||||
void Generic_GCC::CudaInstallationDetector::print(raw_ostream &OS) const {
|
||||
void CudaInstallationDetector::print(raw_ostream &OS) const {
|
||||
if (isValid())
|
||||
OS << "Found CUDA installation: " << InstallPath << ", version "
|
||||
<< CudaVersionToString(Version) << "\n";
|
||||
|
@ -2756,7 +2804,8 @@ void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple(
|
|||
|
||||
Generic_GCC::Generic_GCC(const Driver &D, const llvm::Triple &Triple,
|
||||
const ArgList &Args)
|
||||
: ToolChain(D, Triple, Args), GCCInstallation(D), CudaInstallation(D) {
|
||||
: ToolChain(D, Triple, Args), GCCInstallation(D),
|
||||
CudaInstallation(D, Triple, Args) {
|
||||
getProgramPaths().push_back(getDriver().getInstalledDir());
|
||||
if (getDriver().getInstalledDir() != getDriver().Dir)
|
||||
getProgramPaths().push_back(getDriver().Dir);
|
||||
|
@ -4162,7 +4211,6 @@ static void addMultilibsFilePaths(const Driver &D, const MultilibSet &Multilibs,
|
|||
Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
|
||||
: Generic_ELF(D, Triple, Args) {
|
||||
GCCInstallation.init(Triple, Args);
|
||||
CudaInstallation.init(Triple, Args);
|
||||
Multilibs = GCCInstallation.getMultilibs();
|
||||
llvm::Triple::ArchType Arch = Triple.getArch();
|
||||
std::string SysRoot = computeSysRoot();
|
||||
|
@ -4767,26 +4815,7 @@ void Linux::addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
|
|||
|
||||
void Linux::AddCudaIncludeArgs(const ArgList &DriverArgs,
|
||||
ArgStringList &CC1Args) const {
|
||||
if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
|
||||
// Add cuda_wrappers/* to our system include path. This lets us wrap
|
||||
// standard library headers.
|
||||
SmallString<128> P(getDriver().ResourceDir);
|
||||
llvm::sys::path::append(P, "include");
|
||||
llvm::sys::path::append(P, "cuda_wrappers");
|
||||
addSystemInclude(DriverArgs, CC1Args, P);
|
||||
}
|
||||
|
||||
if (DriverArgs.hasArg(options::OPT_nocudainc))
|
||||
return;
|
||||
|
||||
if (!CudaInstallation.isValid()) {
|
||||
getDriver().Diag(diag::err_drv_no_cuda_installation);
|
||||
return;
|
||||
}
|
||||
|
||||
addSystemInclude(DriverArgs, CC1Args, CudaInstallation.getIncludePath());
|
||||
CC1Args.push_back("-include");
|
||||
CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
|
||||
CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
|
||||
}
|
||||
|
||||
void Linux::AddIAMCUIncludeArgs(const ArgList &DriverArgs,
|
||||
|
@ -4968,16 +4997,18 @@ Tool *DragonFly::buildLinker() const {
|
|||
/// together object files from the assembler into a single blob.
|
||||
|
||||
CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
|
||||
const ArgList &Args)
|
||||
: Linux(D, Triple, Args) {
|
||||
const ToolChain &HostTC, const ArgList &Args)
|
||||
: ToolChain(D, Triple, Args), HostTC(HostTC),
|
||||
CudaInstallation(D, Triple, Args) {
|
||||
if (CudaInstallation.isValid())
|
||||
getProgramPaths().push_back(CudaInstallation.getBinPath());
|
||||
}
|
||||
|
||||
void
|
||||
CudaToolChain::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
|
||||
llvm::opt::ArgStringList &CC1Args) const {
|
||||
Linux::addClangTargetOptions(DriverArgs, CC1Args);
|
||||
void CudaToolChain::addClangTargetOptions(
|
||||
const llvm::opt::ArgList &DriverArgs,
|
||||
llvm::opt::ArgStringList &CC1Args) const {
|
||||
HostTC.addClangTargetOptions(DriverArgs, CC1Args);
|
||||
|
||||
CC1Args.push_back("-fcuda-is-device");
|
||||
|
||||
if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
|
||||
|
@ -5019,13 +5050,18 @@ void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
|
|||
assert(!Arch.empty() && "Must have an explicit GPU arch.");
|
||||
CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
|
||||
}
|
||||
Linux::AddCudaIncludeArgs(DriverArgs, CC1Args);
|
||||
CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
|
||||
}
|
||||
|
||||
llvm::opt::DerivedArgList *
|
||||
CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
|
||||
StringRef BoundArch, Action::OffloadKind) const {
|
||||
DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
|
||||
StringRef BoundArch,
|
||||
Action::OffloadKind DeviceOffloadKind) const {
|
||||
DerivedArgList *DAL =
|
||||
HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
|
||||
if (!DAL)
|
||||
DAL = new DerivedArgList(Args.getBaseArgs());
|
||||
|
||||
const OptTable &Opts = getDriver().getOpts();
|
||||
|
||||
for (Arg *A : Args) {
|
||||
|
@ -5077,6 +5113,30 @@ Tool *CudaToolChain::buildLinker() const {
|
|||
return new tools::NVPTX::Linker(*this);
|
||||
}
|
||||
|
||||
void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
|
||||
HostTC.addClangWarningOptions(CC1Args);
|
||||
}
|
||||
|
||||
ToolChain::CXXStdlibType
|
||||
CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
|
||||
return HostTC.GetCXXStdlibType(Args);
|
||||
}
|
||||
|
||||
void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
|
||||
ArgStringList &CC1Args) const {
|
||||
HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
|
||||
}
|
||||
|
||||
void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
|
||||
ArgStringList &CC1Args) const {
|
||||
HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
|
||||
}
|
||||
|
||||
void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
|
||||
ArgStringList &CC1Args) const {
|
||||
HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
|
||||
}
|
||||
|
||||
/// XCore tool chain
|
||||
XCoreToolChain::XCoreToolChain(const Driver &D, const llvm::Triple &Triple,
|
||||
const ArgList &Args)
|
||||
|
|
|
@ -24,6 +24,60 @@
|
|||
|
||||
namespace clang {
|
||||
namespace driver {
|
||||
|
||||
/// A class to find a viable CUDA installation
|
||||
class CudaInstallationDetector {
|
||||
private:
|
||||
const Driver &D;
|
||||
bool IsValid = false;
|
||||
CudaVersion Version = CudaVersion::UNKNOWN;
|
||||
std::string InstallPath;
|
||||
std::string BinPath;
|
||||
std::string LibPath;
|
||||
std::string LibDevicePath;
|
||||
std::string IncludePath;
|
||||
llvm::StringMap<std::string> LibDeviceMap;
|
||||
|
||||
// CUDA architectures for which we have raised an error in
|
||||
// CheckCudaVersionSupportsArch.
|
||||
mutable llvm::SmallSet<CudaArch, 4> ArchsWithVersionTooLowErrors;
|
||||
|
||||
public:
|
||||
CudaInstallationDetector(const Driver &D, const llvm::Triple &Triple,
|
||||
const llvm::opt::ArgList &Args);
|
||||
|
||||
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
|
||||
llvm::opt::ArgStringList &CC1Args) const;
|
||||
|
||||
/// \brief Emit an error if Version does not support the given Arch.
|
||||
///
|
||||
/// If either Version or Arch is unknown, does not emit an error. Emits at
|
||||
/// most one error per Arch.
|
||||
void CheckCudaVersionSupportsArch(CudaArch Arch) const;
|
||||
|
||||
/// \brief Check whether we detected a valid Cuda install.
|
||||
bool isValid() const { return IsValid; }
|
||||
/// \brief Print information about the detected CUDA installation.
|
||||
void print(raw_ostream &OS) const;
|
||||
|
||||
/// \brief Get the detected Cuda install's version.
|
||||
CudaVersion version() const { return Version; }
|
||||
/// \brief Get the detected Cuda installation path.
|
||||
StringRef getInstallPath() const { return InstallPath; }
|
||||
/// \brief Get the detected path to Cuda's bin directory.
|
||||
StringRef getBinPath() const { return BinPath; }
|
||||
/// \brief Get the detected Cuda Include path.
|
||||
StringRef getIncludePath() const { return IncludePath; }
|
||||
/// \brief Get the detected Cuda library path.
|
||||
StringRef getLibPath() const { return LibPath; }
|
||||
/// \brief Get the detected Cuda device library path.
|
||||
StringRef getLibDevicePath() const { return LibDevicePath; }
|
||||
/// \brief Get libdevice file for given architecture
|
||||
std::string getLibDeviceFile(StringRef Gpu) const {
|
||||
return LibDeviceMap.lookup(Gpu);
|
||||
}
|
||||
};
|
||||
|
||||
namespace toolchains {
|
||||
|
||||
/// Generic_GCC - A tool chain using the 'gcc' command to perform
|
||||
|
@ -157,57 +211,6 @@ public:
|
|||
|
||||
protected:
|
||||
GCCInstallationDetector GCCInstallation;
|
||||
|
||||
// \brief A class to find a viable CUDA installation
|
||||
class CudaInstallationDetector {
|
||||
private:
|
||||
const Driver &D;
|
||||
bool IsValid = false;
|
||||
CudaVersion Version = CudaVersion::UNKNOWN;
|
||||
std::string InstallPath;
|
||||
std::string BinPath;
|
||||
std::string LibPath;
|
||||
std::string LibDevicePath;
|
||||
std::string IncludePath;
|
||||
llvm::StringMap<std::string> LibDeviceMap;
|
||||
|
||||
// CUDA architectures for which we have raised an error in
|
||||
// CheckCudaVersionSupportsArch.
|
||||
mutable llvm::SmallSet<CudaArch, 4> ArchsWithVersionTooLowErrors;
|
||||
|
||||
public:
|
||||
CudaInstallationDetector(const Driver &D) : D(D) {}
|
||||
void init(const llvm::Triple &TargetTriple, const llvm::opt::ArgList &Args);
|
||||
|
||||
/// \brief Emit an error if Version does not support the given Arch.
|
||||
///
|
||||
/// If either Version or Arch is unknown, does not emit an error. Emits at
|
||||
/// most one error per Arch.
|
||||
void CheckCudaVersionSupportsArch(CudaArch Arch) const;
|
||||
|
||||
/// \brief Check whether we detected a valid Cuda install.
|
||||
bool isValid() const { return IsValid; }
|
||||
/// \brief Print information about the detected CUDA installation.
|
||||
void print(raw_ostream &OS) const;
|
||||
|
||||
/// \brief Get the detected Cuda install's version.
|
||||
CudaVersion version() const { return Version; }
|
||||
/// \brief Get the detected Cuda installation path.
|
||||
StringRef getInstallPath() const { return InstallPath; }
|
||||
/// \brief Get the detected path to Cuda's bin directory.
|
||||
StringRef getBinPath() const { return BinPath; }
|
||||
/// \brief Get the detected Cuda Include path.
|
||||
StringRef getIncludePath() const { return IncludePath; }
|
||||
/// \brief Get the detected Cuda library path.
|
||||
StringRef getLibPath() const { return LibPath; }
|
||||
/// \brief Get the detected Cuda device library path.
|
||||
StringRef getLibDevicePath() const { return LibDevicePath; }
|
||||
/// \brief Get libdevice file for given architecture
|
||||
std::string getLibDeviceFile(StringRef Gpu) const {
|
||||
return LibDeviceMap.lookup(Gpu);
|
||||
}
|
||||
};
|
||||
|
||||
CudaInstallationDetector CudaInstallation;
|
||||
|
||||
public:
|
||||
|
@ -403,6 +406,8 @@ public:
|
|||
/// The OS version we are targeting.
|
||||
mutable VersionTuple TargetVersion;
|
||||
|
||||
CudaInstallationDetector CudaInstallation;
|
||||
|
||||
private:
|
||||
void AddDeploymentTarget(llvm::opt::DerivedArgList &Args) const;
|
||||
|
||||
|
@ -543,6 +548,9 @@ public:
|
|||
ObjCRuntime getDefaultObjCRuntime(bool isNonFragile) const override;
|
||||
bool hasBlocksRuntime() const override;
|
||||
|
||||
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
|
||||
llvm::opt::ArgStringList &CC1Args) const override;
|
||||
|
||||
bool UseObjCMixedDispatch() const override {
|
||||
// This is only used with the non-fragile ABI and non-legacy dispatch.
|
||||
|
||||
|
@ -572,6 +580,8 @@ public:
|
|||
bool SupportsEmbeddedBitcode() const override;
|
||||
|
||||
SanitizerMask getSupportedSanitizers() const override;
|
||||
|
||||
void printVerboseInfo(raw_ostream &OS) const override;
|
||||
};
|
||||
|
||||
/// DarwinClang - The Darwin toolchain used by Clang.
|
||||
|
@ -867,10 +877,10 @@ protected:
|
|||
Tool *buildLinker() const override;
|
||||
};
|
||||
|
||||
class LLVM_LIBRARY_VISIBILITY CudaToolChain : public Linux {
|
||||
class LLVM_LIBRARY_VISIBILITY CudaToolChain : public ToolChain {
|
||||
public:
|
||||
CudaToolChain(const Driver &D, const llvm::Triple &Triple,
|
||||
const llvm::opt::ArgList &Args);
|
||||
const ToolChain &HostTC, const llvm::opt::ArgList &Args);
|
||||
|
||||
llvm::opt::DerivedArgList *
|
||||
TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
|
||||
|
@ -881,16 +891,29 @@ public:
|
|||
// Never try to use the integrated assembler with CUDA; always fork out to
|
||||
// ptxas.
|
||||
bool useIntegratedAs() const override { return false; }
|
||||
bool isCrossCompiling() const override { return true; }
|
||||
bool isPICDefault() const override { return false; }
|
||||
bool isPIEDefault() const override { return false; }
|
||||
bool isPICDefaultForced() const override { return false; }
|
||||
bool SupportsProfiling() const override { return false; }
|
||||
bool SupportsObjCGC() const override { return false; }
|
||||
|
||||
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
|
||||
llvm::opt::ArgStringList &CC1Args) const override;
|
||||
|
||||
const Generic_GCC::CudaInstallationDetector &cudaInstallation() const {
|
||||
return CudaInstallation;
|
||||
}
|
||||
Generic_GCC::CudaInstallationDetector &cudaInstallation() {
|
||||
return CudaInstallation;
|
||||
}
|
||||
void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override;
|
||||
CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
|
||||
void
|
||||
AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
|
||||
llvm::opt::ArgStringList &CC1Args) const override;
|
||||
void AddClangCXXStdlibIncludeArgs(
|
||||
const llvm::opt::ArgList &Args,
|
||||
llvm::opt::ArgStringList &CC1Args) const override;
|
||||
void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs,
|
||||
llvm::opt::ArgStringList &CC1Args) const override;
|
||||
|
||||
const ToolChain &HostTC;
|
||||
CudaInstallationDetector CudaInstallation;
|
||||
|
||||
protected:
|
||||
Tool *buildAssembler() const override; // ptxas
|
||||
|
|
|
@ -11981,7 +11981,7 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
|
||||
// Check that our installation's ptxas supports gpu_arch.
|
||||
if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
|
||||
TC.cudaInstallation().CheckCudaVersionSupportsArch(gpu_arch);
|
||||
TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
|
||||
}
|
||||
|
||||
ArgStringList CmdArgs;
|
||||
|
|
|
@ -5,10 +5,18 @@
|
|||
// # Check that we properly detect CUDA installation.
|
||||
// RUN: %clang -v --target=i386-unknown-linux \
|
||||
// RUN: --sysroot=%S/no-cuda-there 2>&1 | FileCheck %s -check-prefix NOCUDA
|
||||
// RUN: %clang -v --target=i386-apple-macosx \
|
||||
// RUN: --sysroot=%S/no-cuda-there 2>&1 | FileCheck %s -check-prefix NOCUDA
|
||||
|
||||
// RUN: %clang -v --target=i386-unknown-linux \
|
||||
// RUN: --sysroot=%S/Inputs/CUDA 2>&1 | FileCheck %s
|
||||
// RUN: %clang -v --target=i386-apple-macosx \
|
||||
// RUN: --sysroot=%S/Inputs/CUDA 2>&1 | FileCheck %s
|
||||
|
||||
// RUN: %clang -v --target=i386-unknown-linux \
|
||||
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 | FileCheck %s
|
||||
// RUN: %clang -v --target=i386-apple-macosx \
|
||||
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 | FileCheck %s
|
||||
|
||||
// Make sure we map libdevice bitcode files to proper GPUs. These
|
||||
// tests use Inputs/CUDA_80 which has full set of libdevice files.
|
||||
|
@ -51,33 +59,51 @@
|
|||
// RUN: | FileCheck %s -check-prefix COMMON \
|
||||
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE50
|
||||
|
||||
|
||||
// Verify that -nocudainc prevents adding include path to CUDA headers.
|
||||
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
|
||||
// RUN: -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \
|
||||
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE35
|
||||
// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \
|
||||
// RUN: -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \
|
||||
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE35
|
||||
|
||||
// We should not add any CUDA include paths if there's no valid CUDA installation
|
||||
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
|
||||
// RUN: --cuda-path=%S/no-cuda-there %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC
|
||||
// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \
|
||||
// RUN: --cuda-path=%S/no-cuda-there %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC
|
||||
|
||||
// Verify that we get an error if there's no libdevice library to link with.
|
||||
// NOTE: Inputs/CUDA deliberately does *not* have libdevice.compute_20 for this purpose.
|
||||
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_20 \
|
||||
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix MISSINGLIBDEVICE
|
||||
// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_20 \
|
||||
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix MISSINGLIBDEVICE
|
||||
|
||||
// Verify that -nocudalib prevents linking libdevice bitcode in.
|
||||
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
|
||||
// RUN: -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOLIBDEVICE
|
||||
// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \
|
||||
// RUN: -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOLIBDEVICE
|
||||
|
||||
// Verify that we don't add include paths, link with libdevice or
|
||||
// -include __clang_cuda_runtime_wrapper.h without valid CUDA installation.
|
||||
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
|
||||
// RUN: --cuda-path=%S/no-cuda-there %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON \
|
||||
// RUN: -check-prefix NOCUDAINC -check-prefix NOLIBDEVICE
|
||||
// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \
|
||||
// RUN: --cuda-path=%S/no-cuda-there %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON \
|
||||
// RUN: -check-prefix NOCUDAINC -check-prefix NOLIBDEVICE
|
||||
|
||||
// Verify that C++ include paths are passed for both host and device frontends.
|
||||
// RUN: %clang -### -no-canonical-prefixes -target x86_64-linux-gnu %s \
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
// Tests that ptxas and fatbinary are correctly during CUDA compilation.
|
||||
// Tests that ptxas and fatbinary are invoked correctly during CUDA
|
||||
// compilation.
|
||||
//
|
||||
// REQUIRES: clang-driver
|
||||
// REQUIRES: x86-registered-target
|
||||
|
@ -56,6 +57,14 @@
|
|||
// RUN: | FileCheck -check-prefix SM20 -check-prefix PTXAS-EXTRA \
|
||||
// RUN: -check-prefix FATBINARY-EXTRA %s
|
||||
|
||||
// MacOS spot-checks
|
||||
// RUN: %clang -### -target x86_64-apple-macosx -O0 -c %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
|
||||
// RUN: %clang -### -target x86_64-apple-macosx --cuda-gpu-arch=sm_35 -c %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM35 %s
|
||||
// RUN: %clang -### -target x86_32-apple-macosx -c %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix ARCH32 -check-prefix SM20 %s
|
||||
|
||||
// Match clang job that produces PTX assembly.
|
||||
// CHECK: "-cc1" "-triple" "nvptx64-nvidia-cuda"
|
||||
// SM20: "-target-cpu" "sm_20"
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
// REQUIRES: clang-driver
|
||||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: nvptx-registered-target
|
||||
//
|
||||
// RUN: %clang -v --target=i386-apple-macosx \
|
||||
// RUN: --sysroot=%S/Inputs/CUDA-macosx 2>&1 | FileCheck %s
|
||||
|
||||
// CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA-macosx/usr/local/cuda
|
Loading…
Reference in New Issue