[CUDA] Driver changes to support CUDA compilation on MacOS.

Summary:
Compiling CUDA device code requires us to know the host toolchain,
because CUDA device-side compiles pull in e.g. host headers.

When we only supported Linux compilation, this worked because
CudaToolChain, which is responsible for device-side CUDA compilation,
inherited from the Linux toolchain.  But in order to support MacOS,
CudaToolChain needs to take a HostToolChain pointer.

Because a CUDA toolchain now requires a host TC, we no longer will
create a CUDA toolchain from Driver::getToolChain -- you have to go
through CreateOffloadingDeviceToolChains.  I am *pretty* sure this is
correct, and that previously any attempt to create a CUDA toolchain
through getToolChain() would eventually have resulted in us throwing
"error: unsupported use of NVPTX for host compilation".

In any case hacking getToolChain to create a CUDA+host toolchain would
be wrong, because a Driver can be reused for multiple compilations,
potentially with different host TCs, and getToolChain will cache the
result, causing us to potentially use a stale host TC.

So that's the main change in this patch.

In addition, we have to pull CudaInstallationDetector out of Generic_GCC
and into a top-level class.  It's now used by the Generic_GCC and MachO
toolchains.

Reviewers: tra

Subscribers: rryan, hfinkel, sfantao

Differential Revision: https://reviews.llvm.org/D26774

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@287285 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Justin Lebar 2016-11-18 00:41:22 +00:00
parent c2d61705d6
commit 29df057cd3
13 changed files with 247 additions and 113 deletions

View File

@ -38,6 +38,7 @@ class FileSystem;
namespace driver {
class Compilation;
class CudaInstallationDetector;
class Driver;
class JobAction;
class RegisterEffectiveTriple;

View File

@ -473,14 +473,18 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
if (llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
return types::isCuda(I.first);
})) {
const ToolChain &TC = getToolChain(
C.getInputArgs(),
llvm::Triple(C.getSingleOffloadToolChain<Action::OFK_Host>()
->getTriple()
.isArch64Bit()
? "nvptx64-nvidia-cuda"
: "nvptx-nvidia-cuda"));
C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda);
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
const llvm::Triple &HostTriple = HostTC->getTriple();
llvm::Triple CudaTriple(HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda"
: "nvptx-nvidia-cuda");
// Use the CUDA and host triples as the key into the ToolChains map, because
// the device toolchain we create depends on both.
ToolChain *&CudaTC = ToolChains[CudaTriple.str() + "/" + HostTriple.str()];
if (!CudaTC) {
CudaTC = new toolchains::CudaToolChain(*this, CudaTriple, *HostTC,
C.getInputArgs());
}
C.addOffloadDeviceToolChain(CudaTC, Action::OFK_Cuda);
}
//
@ -3717,9 +3721,6 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
break;
}
break;
case llvm::Triple::CUDA:
TC = new toolchains::CudaToolChain(*this, Target, Args);
break;
case llvm::Triple::PS4:
TC = new toolchains::PS4CPU(*this, Target, Args);
break;
@ -3761,6 +3762,12 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
}
}
}
// Intentionally omitted from the switch above: llvm::Triple::CUDA. CUDA
// compiles always need two toolchains, the CUDA toolchain and the host
// toolchain. So the only valid way to create a CUDA toolchain is via
// CreateOffloadingDeviceToolChains.
return *TC;
}

View File

@ -52,7 +52,8 @@ MachO::MachO(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
/// Darwin - Darwin tool chain for i386 and x86_64.
Darwin::Darwin(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
: MachO(D, Triple, Args), TargetInitialized(false) {}
: MachO(D, Triple, Args), TargetInitialized(false),
CudaInstallation(D, Triple, Args) {}
types::ID MachO::LookupTypeForExtension(StringRef Ext) const {
types::ID Ty = types::lookupTypeForExtension(Ext);
@ -99,6 +100,11 @@ bool Darwin::hasBlocksRuntime() const {
}
}
void Darwin::AddCudaIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
}
// This is just a MachO name translation routine and there's no
// way to join this into ARMTargetParser without breaking all
// other assumptions. Maybe MachO should consider standardising
@ -1296,6 +1302,10 @@ SanitizerMask Darwin::getSupportedSanitizers() const {
return Res;
}
void Darwin::printVerboseInfo(raw_ostream &OS) const {
CudaInstallation.print(OS);
}
/// Generic_GCC - A tool chain using the 'gcc' command to perform
/// all subcommands; this relies on gcc translating the majority of
/// command line options.
@ -1811,10 +1821,10 @@ static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
return CudaVersion::UNKNOWN;
}
// \brief -- try common CUDA installation paths looking for files we need for
// CUDA compilation.
void Generic_GCC::CudaInstallationDetector::init(
const llvm::Triple &TargetTriple, const llvm::opt::ArgList &Args) {
CudaInstallationDetector::CudaInstallationDetector(
const Driver &D, const llvm::Triple &TargetTriple,
const llvm::opt::ArgList &Args)
: D(D) {
SmallVector<std::string, 4> CudaPathCandidates;
if (Args.hasArg(options::OPT_cuda_path_EQ))
@ -1835,13 +1845,25 @@ void Generic_GCC::CudaInstallationDetector::init(
BinPath = CudaPath + "/bin";
IncludePath = InstallPath + "/include";
LibDevicePath = InstallPath + "/nvvm/libdevice";
LibPath = InstallPath + (TargetTriple.isArch64Bit() ? "/lib64" : "/lib");
auto &FS = D.getVFS();
if (!(FS.exists(IncludePath) && FS.exists(BinPath) && FS.exists(LibPath) &&
if (!(FS.exists(IncludePath) && FS.exists(BinPath) &&
FS.exists(LibDevicePath)))
continue;
// On Linux, we have both lib and lib64 directories, and we need to choose
// based on our triple. On MacOS, we have only a lib directory.
//
// It's sufficient for our purposes to be flexible: If both lib and lib64
// exist, we choose whichever one matches our triple. Otherwise, if only
// lib exists, we use it.
if (TargetTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
LibPath = InstallPath + "/lib64";
else if (FS.exists(InstallPath + "/lib"))
LibPath = InstallPath + "/lib";
else
continue;
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
FS.getBufferForFile(InstallPath + "/version.txt");
if (!VersionFile) {
@ -1898,7 +1920,33 @@ void Generic_GCC::CudaInstallationDetector::init(
}
}
void Generic_GCC::CudaInstallationDetector::CheckCudaVersionSupportsArch(
void CudaInstallationDetector::AddCudaIncludeArgs(
const ArgList &DriverArgs, ArgStringList &CC1Args) const {
if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
// Add cuda_wrappers/* to our system include path. This lets us wrap
// standard library headers.
SmallString<128> P(D.ResourceDir);
llvm::sys::path::append(P, "include");
llvm::sys::path::append(P, "cuda_wrappers");
CC1Args.push_back("-internal-isystem");
CC1Args.push_back(DriverArgs.MakeArgString(P));
}
if (DriverArgs.hasArg(options::OPT_nocudainc))
return;
if (!isValid()) {
D.Diag(diag::err_drv_no_cuda_installation);
return;
}
CC1Args.push_back("-internal-isystem");
CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
CC1Args.push_back("-include");
CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
}
void CudaInstallationDetector::CheckCudaVersionSupportsArch(
CudaArch Arch) const {
if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
ArchsWithVersionTooLowErrors.count(Arch) > 0)
@ -1913,7 +1961,7 @@ void Generic_GCC::CudaInstallationDetector::CheckCudaVersionSupportsArch(
}
}
void Generic_GCC::CudaInstallationDetector::print(raw_ostream &OS) const {
void CudaInstallationDetector::print(raw_ostream &OS) const {
if (isValid())
OS << "Found CUDA installation: " << InstallPath << ", version "
<< CudaVersionToString(Version) << "\n";
@ -2756,7 +2804,8 @@ void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple(
Generic_GCC::Generic_GCC(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
: ToolChain(D, Triple, Args), GCCInstallation(D), CudaInstallation(D) {
: ToolChain(D, Triple, Args), GCCInstallation(D),
CudaInstallation(D, Triple, Args) {
getProgramPaths().push_back(getDriver().getInstalledDir());
if (getDriver().getInstalledDir() != getDriver().Dir)
getProgramPaths().push_back(getDriver().Dir);
@ -4162,7 +4211,6 @@ static void addMultilibsFilePaths(const Driver &D, const MultilibSet &Multilibs,
Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
: Generic_ELF(D, Triple, Args) {
GCCInstallation.init(Triple, Args);
CudaInstallation.init(Triple, Args);
Multilibs = GCCInstallation.getMultilibs();
llvm::Triple::ArchType Arch = Triple.getArch();
std::string SysRoot = computeSysRoot();
@ -4767,26 +4815,7 @@ void Linux::addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
void Linux::AddCudaIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
// Add cuda_wrappers/* to our system include path. This lets us wrap
// standard library headers.
SmallString<128> P(getDriver().ResourceDir);
llvm::sys::path::append(P, "include");
llvm::sys::path::append(P, "cuda_wrappers");
addSystemInclude(DriverArgs, CC1Args, P);
}
if (DriverArgs.hasArg(options::OPT_nocudainc))
return;
if (!CudaInstallation.isValid()) {
getDriver().Diag(diag::err_drv_no_cuda_installation);
return;
}
addSystemInclude(DriverArgs, CC1Args, CudaInstallation.getIncludePath());
CC1Args.push_back("-include");
CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
}
void Linux::AddIAMCUIncludeArgs(const ArgList &DriverArgs,
@ -4968,16 +4997,18 @@ Tool *DragonFly::buildLinker() const {
/// together object files from the assembler into a single blob.
CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
: Linux(D, Triple, Args) {
const ToolChain &HostTC, const ArgList &Args)
: ToolChain(D, Triple, Args), HostTC(HostTC),
CudaInstallation(D, Triple, Args) {
if (CudaInstallation.isValid())
getProgramPaths().push_back(CudaInstallation.getBinPath());
}
void
CudaToolChain::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const {
Linux::addClangTargetOptions(DriverArgs, CC1Args);
void CudaToolChain::addClangTargetOptions(
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const {
HostTC.addClangTargetOptions(DriverArgs, CC1Args);
CC1Args.push_back("-fcuda-is-device");
if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
@ -5019,13 +5050,18 @@ void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
assert(!Arch.empty() && "Must have an explicit GPU arch.");
CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
}
Linux::AddCudaIncludeArgs(DriverArgs, CC1Args);
CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
}
llvm::opt::DerivedArgList *
CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
StringRef BoundArch, Action::OffloadKind) const {
DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind) const {
DerivedArgList *DAL =
HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
if (!DAL)
DAL = new DerivedArgList(Args.getBaseArgs());
const OptTable &Opts = getDriver().getOpts();
for (Arg *A : Args) {
@ -5077,6 +5113,30 @@ Tool *CudaToolChain::buildLinker() const {
return new tools::NVPTX::Linker(*this);
}
void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
HostTC.addClangWarningOptions(CC1Args);
}
ToolChain::CXXStdlibType
CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
return HostTC.GetCXXStdlibType(Args);
}
void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
}
void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
ArgStringList &CC1Args) const {
HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
}
void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
ArgStringList &CC1Args) const {
HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
}
/// XCore tool chain
XCoreToolChain::XCoreToolChain(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)

View File

@ -24,6 +24,60 @@
namespace clang {
namespace driver {
/// A class to find a viable CUDA installation
class CudaInstallationDetector {
private:
const Driver &D;
bool IsValid = false;
CudaVersion Version = CudaVersion::UNKNOWN;
std::string InstallPath;
std::string BinPath;
std::string LibPath;
std::string LibDevicePath;
std::string IncludePath;
llvm::StringMap<std::string> LibDeviceMap;
// CUDA architectures for which we have raised an error in
// CheckCudaVersionSupportsArch.
mutable llvm::SmallSet<CudaArch, 4> ArchsWithVersionTooLowErrors;
public:
CudaInstallationDetector(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;
/// \brief Emit an error if Version does not support the given Arch.
///
/// If either Version or Arch is unknown, does not emit an error. Emits at
/// most one error per Arch.
void CheckCudaVersionSupportsArch(CudaArch Arch) const;
/// \brief Check whether we detected a valid Cuda install.
bool isValid() const { return IsValid; }
/// \brief Print information about the detected CUDA installation.
void print(raw_ostream &OS) const;
/// \brief Get the detected Cuda install's version.
CudaVersion version() const { return Version; }
/// \brief Get the detected Cuda installation path.
StringRef getInstallPath() const { return InstallPath; }
/// \brief Get the detected path to Cuda's bin directory.
StringRef getBinPath() const { return BinPath; }
/// \brief Get the detected Cuda Include path.
StringRef getIncludePath() const { return IncludePath; }
/// \brief Get the detected Cuda library path.
StringRef getLibPath() const { return LibPath; }
/// \brief Get the detected Cuda device library path.
StringRef getLibDevicePath() const { return LibDevicePath; }
/// \brief Get libdevice file for given architecture
std::string getLibDeviceFile(StringRef Gpu) const {
return LibDeviceMap.lookup(Gpu);
}
};
namespace toolchains {
/// Generic_GCC - A tool chain using the 'gcc' command to perform
@ -157,57 +211,6 @@ public:
protected:
GCCInstallationDetector GCCInstallation;
// \brief A class to find a viable CUDA installation
class CudaInstallationDetector {
private:
const Driver &D;
bool IsValid = false;
CudaVersion Version = CudaVersion::UNKNOWN;
std::string InstallPath;
std::string BinPath;
std::string LibPath;
std::string LibDevicePath;
std::string IncludePath;
llvm::StringMap<std::string> LibDeviceMap;
// CUDA architectures for which we have raised an error in
// CheckCudaVersionSupportsArch.
mutable llvm::SmallSet<CudaArch, 4> ArchsWithVersionTooLowErrors;
public:
CudaInstallationDetector(const Driver &D) : D(D) {}
void init(const llvm::Triple &TargetTriple, const llvm::opt::ArgList &Args);
/// \brief Emit an error if Version does not support the given Arch.
///
/// If either Version or Arch is unknown, does not emit an error. Emits at
/// most one error per Arch.
void CheckCudaVersionSupportsArch(CudaArch Arch) const;
/// \brief Check whether we detected a valid Cuda install.
bool isValid() const { return IsValid; }
/// \brief Print information about the detected CUDA installation.
void print(raw_ostream &OS) const;
/// \brief Get the detected Cuda install's version.
CudaVersion version() const { return Version; }
/// \brief Get the detected Cuda installation path.
StringRef getInstallPath() const { return InstallPath; }
/// \brief Get the detected path to Cuda's bin directory.
StringRef getBinPath() const { return BinPath; }
/// \brief Get the detected Cuda Include path.
StringRef getIncludePath() const { return IncludePath; }
/// \brief Get the detected Cuda library path.
StringRef getLibPath() const { return LibPath; }
/// \brief Get the detected Cuda device library path.
StringRef getLibDevicePath() const { return LibDevicePath; }
/// \brief Get libdevice file for given architecture
std::string getLibDeviceFile(StringRef Gpu) const {
return LibDeviceMap.lookup(Gpu);
}
};
CudaInstallationDetector CudaInstallation;
public:
@ -403,6 +406,8 @@ public:
/// The OS version we are targeting.
mutable VersionTuple TargetVersion;
CudaInstallationDetector CudaInstallation;
private:
void AddDeploymentTarget(llvm::opt::DerivedArgList &Args) const;
@ -543,6 +548,9 @@ public:
ObjCRuntime getDefaultObjCRuntime(bool isNonFragile) const override;
bool hasBlocksRuntime() const override;
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
bool UseObjCMixedDispatch() const override {
// This is only used with the non-fragile ABI and non-legacy dispatch.
@ -572,6 +580,8 @@ public:
bool SupportsEmbeddedBitcode() const override;
SanitizerMask getSupportedSanitizers() const override;
void printVerboseInfo(raw_ostream &OS) const override;
};
/// DarwinClang - The Darwin toolchain used by Clang.
@ -867,10 +877,10 @@ protected:
Tool *buildLinker() const override;
};
class LLVM_LIBRARY_VISIBILITY CudaToolChain : public Linux {
class LLVM_LIBRARY_VISIBILITY CudaToolChain : public ToolChain {
public:
CudaToolChain(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
const ToolChain &HostTC, const llvm::opt::ArgList &Args);
llvm::opt::DerivedArgList *
TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
@ -881,16 +891,29 @@ public:
// Never try to use the integrated assembler with CUDA; always fork out to
// ptxas.
bool useIntegratedAs() const override { return false; }
bool isCrossCompiling() const override { return true; }
bool isPICDefault() const override { return false; }
bool isPIEDefault() const override { return false; }
bool isPICDefaultForced() const override { return false; }
bool SupportsProfiling() const override { return false; }
bool SupportsObjCGC() const override { return false; }
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
const Generic_GCC::CudaInstallationDetector &cudaInstallation() const {
return CudaInstallation;
}
Generic_GCC::CudaInstallationDetector &cudaInstallation() {
return CudaInstallation;
}
void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override;
CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
void
AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
void AddClangCXXStdlibIncludeArgs(
const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CC1Args) const override;
void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
const ToolChain &HostTC;
CudaInstallationDetector CudaInstallation;
protected:
Tool *buildAssembler() const override; // ptxas

View File

@ -11981,7 +11981,7 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
// Check that our installation's ptxas supports gpu_arch.
if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
TC.cudaInstallation().CheckCudaVersionSupportsArch(gpu_arch);
TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
}
ArgStringList CmdArgs;

View File

@ -5,10 +5,18 @@
// # Check that we properly detect CUDA installation.
// RUN: %clang -v --target=i386-unknown-linux \
// RUN: --sysroot=%S/no-cuda-there 2>&1 | FileCheck %s -check-prefix NOCUDA
// RUN: %clang -v --target=i386-apple-macosx \
// RUN: --sysroot=%S/no-cuda-there 2>&1 | FileCheck %s -check-prefix NOCUDA
// RUN: %clang -v --target=i386-unknown-linux \
// RUN: --sysroot=%S/Inputs/CUDA 2>&1 | FileCheck %s
// RUN: %clang -v --target=i386-apple-macosx \
// RUN: --sysroot=%S/Inputs/CUDA 2>&1 | FileCheck %s
// RUN: %clang -v --target=i386-unknown-linux \
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 | FileCheck %s
// RUN: %clang -v --target=i386-apple-macosx \
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 | FileCheck %s
// Make sure we map libdevice bitcode files to proper GPUs. These
// tests use Inputs/CUDA_80 which has full set of libdevice files.
@ -51,33 +59,51 @@
// RUN: | FileCheck %s -check-prefix COMMON \
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE50
// Verify that -nocudainc prevents adding include path to CUDA headers.
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
// RUN: -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE35
// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \
// RUN: -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE35
// We should not add any CUDA include paths if there's no valid CUDA installation
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
// RUN: --cuda-path=%S/no-cuda-there %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC
// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \
// RUN: --cuda-path=%S/no-cuda-there %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC
// Verify that we get an error if there's no libdevice library to link with.
// NOTE: Inputs/CUDA deliberately does *not* have libdevice.compute_20 for this purpose.
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_20 \
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix MISSINGLIBDEVICE
// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_20 \
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix MISSINGLIBDEVICE
// Verify that -nocudalib prevents linking libdevice bitcode in.
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
// RUN: -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOLIBDEVICE
// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \
// RUN: -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOLIBDEVICE
// Verify that we don't add include paths, link with libdevice or
// -include __clang_cuda_runtime_wrapper.h without valid CUDA installation.
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
// RUN: --cuda-path=%S/no-cuda-there %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON \
// RUN: -check-prefix NOCUDAINC -check-prefix NOLIBDEVICE
// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \
// RUN: --cuda-path=%S/no-cuda-there %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON \
// RUN: -check-prefix NOCUDAINC -check-prefix NOLIBDEVICE
// Verify that C++ include paths are passed for both host and device frontends.
// RUN: %clang -### -no-canonical-prefixes -target x86_64-linux-gnu %s \

View File

@ -1,4 +1,5 @@
// Tests that ptxas and fatbinary are correctly during CUDA compilation.
// Tests that ptxas and fatbinary are invoked correctly during CUDA
// compilation.
//
// REQUIRES: clang-driver
// REQUIRES: x86-registered-target
@ -56,6 +57,14 @@
// RUN: | FileCheck -check-prefix SM20 -check-prefix PTXAS-EXTRA \
// RUN: -check-prefix FATBINARY-EXTRA %s
// MacOS spot-checks
// RUN: %clang -### -target x86_64-apple-macosx -O0 -c %s 2>&1 \
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
// RUN: %clang -### -target x86_64-apple-macosx --cuda-gpu-arch=sm_35 -c %s 2>&1 \
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM35 %s
// RUN: %clang -### -target x86_32-apple-macosx -c %s 2>&1 \
// RUN: | FileCheck -check-prefix ARCH32 -check-prefix SM20 %s
// Match clang job that produces PTX assembly.
// CHECK: "-cc1" "-triple" "nvptx64-nvidia-cuda"
// SM20: "-target-cpu" "sm_20"

View File

@ -0,0 +1,8 @@
// REQUIRES: clang-driver
// REQUIRES: x86-registered-target
// REQUIRES: nvptx-registered-target
//
// RUN: %clang -v --target=i386-apple-macosx \
// RUN: --sysroot=%S/Inputs/CUDA-macosx 2>&1 | FileCheck %s
// CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA-macosx/usr/local/cuda