[Driver] Driver changes to support CUDA compilation on Windows.

Summary: For the most part this is straightforward: Just add a CudaInstallation object to the MSVC and MinGW toolchains. CudaToolChain has to override computeMSVCVersion so that Clang::constructJob passes the right version flag to cc1. We have to modify IsWindowsMSVC and friends in Clang::constructJob to be true when compiling CUDA device code on Windows for the same reason. Depends on: D28319 Reviewers: tra Subscribers: cfe-commits Differential Revision: https://reviews.llvm.org/D28320 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@291131 91177308-0d34-0410-b5e6-96231b3b80d8
2017-01-05 16:52:29 +00:00 · 2017-01-05 16:52:29 +00:00 · c04b67446e
parent c3d7a36177
commit c04b67446e
12 changed files with 98 additions and 15 deletions
--- a/include/clang/Driver/ToolChain.h
+++ b/include/clang/Driver/ToolChain.h
@ -139,6 +139,13 @@ public:
  vfs::FileSystem &getVFS() const;
  const llvm::Triple &getTriple() const { return Triple; }

+  /// Get the toolchain's aux triple, if it has one.
+  ///
+  /// Exactly what the aux triple represents depends on the toolchain, but for
+  /// example when compiling CUDA code for the GPU, the triple might be NVPTX,
+  /// while the aux triple is the host (CPU) toolchain, e.g. x86-linux-gnu.
+  virtual const llvm::Triple *getAuxTriple() const { return nullptr; }
+
  llvm::Triple::ArchType getArch() const { return Triple.getArch(); }
  StringRef getArchName() const { return Triple.getArchName(); }
  StringRef getPlatform() const { return Triple.getVendorName(); }
--- a/lib/Driver/MSVCToolChain.cpp
+++ b/lib/Driver/MSVCToolChain.cpp
@ -47,9 +47,9 @@ using namespace clang::driver::toolchains;
 using namespace clang;
 using namespace llvm::opt;

-MSVCToolChain::MSVCToolChain(const Driver &D, const llvm::Triple& Triple,
+MSVCToolChain::MSVCToolChain(const Driver &D, const llvm::Triple &Triple,
                             const ArgList &Args)
-  : ToolChain(D, Triple, Args) {
+    : ToolChain(D, Triple, Args), CudaInstallation(D, Triple, Args) {
  getProgramPaths().push_back(getDriver().getInstalledDir());
  if (getDriver().getInstalledDir() != getDriver().Dir)
    getProgramPaths().push_back(getDriver().Dir);
@ -94,6 +94,15 @@ bool MSVCToolChain::isPICDefaultForced() const {
  return getArch() == llvm::Triple::x86_64;
 }

+void MSVCToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
+                                       ArgStringList &CC1Args) const {
+  CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
+}
+
+void MSVCToolChain::printVerboseInfo(raw_ostream &OS) const {
+  CudaInstallation.print(OS);
+}
+
 #ifdef USE_WIN32
 static bool readFullStringValue(HKEY hkey, const char *valueName,
                                std::string &value) {
--- a/lib/Driver/MinGWToolChain.cpp
+++ b/lib/Driver/MinGWToolChain.cpp
@ -63,7 +63,7 @@ void MinGW::findGccLibDir() {
 }

 MinGW::MinGW(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
-    : ToolChain(D, Triple, Args) {
+    : ToolChain(D, Triple, Args), CudaInstallation(D, Triple, Args) {
  getProgramPaths().push_back(getDriver().getInstalledDir());

 // In Windows there aren't any standard install locations, we search
@ -135,6 +135,15 @@ bool MinGW::UseSEHExceptions() const {
  return getArch() == llvm::Triple::x86_64;
 }

+void MinGW::AddCudaIncludeArgs(const ArgList &DriverArgs,
+                               ArgStringList &CC1Args) const {
+  CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
+}
+
+void MinGW::printVerboseInfo(raw_ostream &OS) const {
+  CudaInstallation.print(OS);
+}
+
 // Include directories for various hosts:

 // Windows, mingw.org
--- a/lib/Driver/ToolChains.cpp
+++ b/lib/Driver/ToolChains.cpp
@ -1810,14 +1810,21 @@ CudaInstallationDetector::CudaInstallationDetector(
    : D(D) {
  SmallVector<std::string, 4> CudaPathCandidates;

-  if (Args.hasArg(options::OPT_cuda_path_EQ))
+  // In decreasing order so we prefer newer versions to older versions.
+  std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
+
+  if (Args.hasArg(options::OPT_cuda_path_EQ)) {
    CudaPathCandidates.push_back(
        Args.getLastArgValue(options::OPT_cuda_path_EQ));
-  else {
+  } else if (HostTriple.isOSWindows()) {
+    for (const char *Ver : Versions)
+      CudaPathCandidates.push_back(
+          D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
+          Ver);
+  } else {
    CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda");
-    CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-8.0");
-    CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-7.5");
-    CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-7.0");
+    for (const char *Ver : Versions)
+      CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-" + Ver);
  }

  for (const auto &CudaPath : CudaPathCandidates) {
@ -5021,6 +5028,11 @@ SanitizerMask CudaToolChain::getSupportedSanitizers() const {
  return HostTC.getSupportedSanitizers();
 }

+VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
+                                               const ArgList &Args) const {
+  return HostTC.computeMSVCVersion(D, Args);
+}
+
 /// XCore tool chain
 XCoreToolChain::XCoreToolChain(const Driver &D, const llvm::Triple &Triple,
                               const ArgList &Args)
--- a/lib/Driver/ToolChains.h
+++ b/lib/Driver/ToolChains.h
@ -709,12 +709,19 @@ public:
      const llvm::opt::ArgList &DriverArgs,
      llvm::opt::ArgStringList &CC1Args) const override;

+  void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                          llvm::opt::ArgStringList &CC1Args) const override;
+
+  void printVerboseInfo(raw_ostream &OS) const override;
+
 protected:
  Tool *getTool(Action::ActionClass AC) const override;
  Tool *buildLinker() const override;
  Tool *buildAssembler() const override;

 private:
+  CudaInstallationDetector CudaInstallation;
+
  std::string Base;
  std::string GccLibDir;
  std::string Ver;
@ -892,6 +899,10 @@ public:
  CudaToolChain(const Driver &D, const llvm::Triple &Triple,
                const ToolChain &HostTC, const llvm::opt::ArgList &Args);

+  virtual const llvm::Triple *getAuxTriple() const override {
+    return &HostTC.getTriple();
+  }
+
  llvm::opt::DerivedArgList *
  TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
                Action::OffloadKind DeviceOffloadKind) const override;
@ -924,6 +935,10 @@ public:

  SanitizerMask getSupportedSanitizers() const override;

+  VersionTuple
+  computeMSVCVersion(const Driver *D,
+                     const llvm::opt::ArgList &Args) const override;
+
  const ToolChain &HostTC;
  CudaInstallationDetector CudaInstallation;

@ -1147,6 +1162,9 @@ public:
      const llvm::opt::ArgList &DriverArgs,
      llvm::opt::ArgStringList &CC1Args) const override;

+  void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                          llvm::opt::ArgStringList &CC1Args) const override;
+
  bool getWindowsSDKDir(std::string &path, int &major,
                        std::string &windowsSDKIncludeVersion,
                        std::string &windowsSDKLibVersion) const;
@ -1166,6 +1184,8 @@ public:
                                          types::ID InputType) const override;
  SanitizerMask getSupportedSanitizers() const override;

+  void printVerboseInfo(raw_ostream &OS) const override;
+
 protected:
  void AddSystemIncludeWithSubfolder(const llvm::opt::ArgList &DriverArgs,
                                     llvm::opt::ArgStringList &CC1Args,
@ -1179,6 +1199,8 @@ protected:
 private:
  VersionTuple getMSVCVersionFromTriple() const;
  VersionTuple getMSVCVersionFromExe() const;
+
+  CudaInstallationDetector CudaInstallation;
 };

 class LLVM_LIBRARY_VISIBILITY CrossWindowsToolChain : public Generic_GCC {
--- a/lib/Driver/Tools.cpp
+++ b/lib/Driver/Tools.cpp
@ -4086,13 +4086,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
  const Driver &D = getToolChain().getDriver();
  ArgStringList CmdArgs;

-  bool IsWindowsGNU = getToolChain().getTriple().isWindowsGNUEnvironment();
-  bool IsWindowsCygnus =
-      getToolChain().getTriple().isWindowsCygwinEnvironment();
-  bool IsWindowsMSVC = getToolChain().getTriple().isWindowsMSVCEnvironment();
-  bool IsPS4CPU = getToolChain().getTriple().isPS4CPU();
-  bool IsIAMCU = getToolChain().getTriple().isOSIAMCU();
-
  // Check number of inputs for sanity. We need at least one input.
  assert(Inputs.size() >= 1 && "Must have at least one input.");
  const InputInfo &Input = Inputs[0];
@ -4106,6 +4099,23 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
          Inputs.size() == 1) &&
         "Unable to handle multiple inputs.");

+  bool IsWindowsGNU = getToolChain().getTriple().isWindowsGNUEnvironment();
+  bool IsWindowsCygnus =
+      getToolChain().getTriple().isWindowsCygwinEnvironment();
+  bool IsWindowsMSVC = getToolChain().getTriple().isWindowsMSVCEnvironment();
+  bool IsPS4CPU = getToolChain().getTriple().isPS4CPU();
+  bool IsIAMCU = getToolChain().getTriple().isOSIAMCU();
+
+  // Adjust IsWindowsXYZ for CUDA compilations.  Even when compiling in device
+  // mode (i.e., getToolchain().getTriple() is NVPTX, not Windows), we need to
+  // pass Windows-specific flags to cc1.
+  if (IsCuda) {
+    const llvm::Triple *AuxTriple = getToolChain().getAuxTriple();
+    IsWindowsMSVC |= AuxTriple && AuxTriple->isWindowsMSVCEnvironment();
+    IsWindowsGNU |= AuxTriple && AuxTriple->isWindowsGNUEnvironment();
+    IsWindowsCygnus |= AuxTriple && AuxTriple->isWindowsCygwinEnvironment();
+  }
+
  // C++ is not supported for IAMCU.
  if (IsIAMCU && types::isCXX(Input.getType()))
    D.Diag(diag::err_drv_clang_unsupported) << "C++ for IAMCU";
--- a/test/Driver/Inputs/CUDA-windows/Program
+++ b/test/Driver/Inputs/CUDA-windows/Program
--- a/test/Driver/Inputs/CUDA-windows/Program
+++ b/test/Driver/Inputs/CUDA-windows/Program
--- a/test/Driver/Inputs/CUDA-windows/Program
+++ b/test/Driver/Inputs/CUDA-windows/Program
--- a/Toolkit/CUDA/v8.0/nvvm/libdevice/libdevice.compute_30.10.bc
+++ b/Toolkit/CUDA/v8.0/nvvm/libdevice/libdevice.compute_30.10.bc
--- a/Toolkit/CUDA/v8.0/nvvm/libdevice/libdevice.compute_35.10.bc
+++ b/Toolkit/CUDA/v8.0/nvvm/libdevice/libdevice.compute_35.10.bc
--- a/test/Driver/cuda-windows.cu
+++ b/test/Driver/cuda-windows.cu
@ -0,0 +1,14 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+//
+// RUN: %clang -v --target=i386-pc-windows-msvc \
+// RUN:   --sysroot=%S/Inputs/CUDA-windows 2>&1 %s -### | FileCheck %s
+// RUN: %clang -v --target=i386-pc-windows-mingw32 \
+// RUN:   --sysroot=%S/Inputs/CUDA-windows 2>&1 %s -### | FileCheck %s
+
+// CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0
+// CHECK: "-cc1" "-triple" "nvptx-nvidia-cuda"
+// CHECK-SAME: "-fms-extensions"
+// CHECK-SAME: "-fms-compatibility"
+// CHECK-SAME: "-fms-compatibility-version=